I have a python file which I have called Pre_Processing_File.py, this file has the function pre_Processing which loads in a text file and creates 3 data frames; userListing_DF,PrivAcc,allAccountsDF, this function then returns the 3 DFs.
What I want to do is create another script and import the 3 DFs from the pre_Processing.py file, I have created a script called call_DFs which I have also included below. The error I’m getting with this is: UnboundLocalError: local variable ‘userListing_DF’ referenced before assignment. I’m not sure where to define all 3 DFs within the script
Pre_Processing_File.py
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF): ## Import Libraries import pandas as pd import numpy as np from pandas import DataFrame dir1 = filedialog.askopenfile(mode="r", initialdir="/", title="select the first file", filetypes=(("Text files", "*.txt"), ("all files", "*.*"))) dir2 = filedialog.askopenfile(mode="rb", initialdir="/", title="Select the second file", filetypes=(("Excel files", "*.xlsx"), ("all files", "*.*"))) ###################################################################################################################### ###################################################################################################################### ## Read in the single long column, and then form a group indicator by seeing where the value is '[User]'. Then separate the ## column labels and values, with a str.split and join back to your DataFrame. Finally pivot to your desired shape. userListing_DF = pd.read_csv(dir1, sep='n', header=None) userListing_DF['Group'] = userListing_DF[0].eq('[User]').cumsum() # This creates groups, groups everything until the next 'User' occurs userListing_DF = userListing_DF[userListing_DF[0].ne('[User]')] # No longer need these rows userListing_DF = pd.concat([userListing_DF, userListing_DF[0].str.split('=', expand=True).rename(columns={0: 'col', 1: 'val'})], axis=1) # This performs a string slipt to seperate the headers with the User data userListing_DF = userListing_DF.pivot(index='Group', columns='col', values='val').rename_axis(columns=None) ###################################################################################################################### ###################################################################################################################### ## Only keep some columns and remove the rest userListing_DF = userListing_DF[['uid','last_name','first_name','role']] #uid, last, first, role ## Copy data from the userListing_DF dataframe PrivilegedAccounts_DF=pd.concat([userListing_DF],axis=1) ## Remove the multiIndex - cannot use below query with multi index you will get the following error PrivilegedAccounts_DF.columns = PrivilegedAccounts_DF.columns.map(''.join) ## We cam also remove the MultiIndex for the below DF. If we don't the Index will stay and there will be other formatting ## problems when exporting to excel userListing_DF.columns = userListing_DF.columns.map(''.join) ## Fill blank fields in the first name column with Nan PrivilegedAccounts_DF = PrivilegedAccounts_DF.fillna( 'blank' ) ## Create a function - If values in the first name column match the search criteria return True, else return false (Create new ## column called PrivilegedAccess to store information) with open(r'DesktopTestPrivScriptdesignator_Priv_Accounts_List.txt') as f: priv_List = f.readlines() priv_List = [x.strip() for x in priv_List] priv_List = list(filter(lambda x: x, priv_List)) Search_for_These_values = priv_List pattern = '|'.join(Search_for_These_values) PrivilegedAccounts_DF['PrivilegedAccess'] = PrivilegedAccounts_DF['first_name'].str.contains(pattern).astype(str) ## Create a function - Create a "Calc" column - If first name is blank then value will be "Privileged", If value is Diagnostics ## then enter "Diagnostics", If value is SYS then enter "SYS", If value is service ## account then enter "service account" def label_columnCalc(row): if row['first_name'] in Search_for_These_values: return 'Privileged' else: return row['first_name'] PrivilegedAccounts_DF['Calc'] = PrivilegedAccounts_DF.apply(lambda row: label_columnCalc(row), axis=1) ###################################################################################################################### ###################################################################################################################### # create a dataframe of last quarters Privileged Access Review Excel file lastQuarterReview = pd.read_excel(dir2) # Now look to see if any new User ID's exist since last quarter PrivAcc = PrivilegedAccounts_DF PrivAcc = PrivAcc.assign(lastQuarterReview=PrivAcc.uid.isin(lastQuarterReview.uid).astype(str)) # Rename the column PrivAcc.rename(columns={'lastQuarterReview': 'Did UID exist last quarter'}, inplace=True) ###################################################################################################################### ###################################################################################################################### # Change the boolean values in the Did UID exist last quarter to "Existed last Previous Quarter" or "New User" booleandf = PrivAcc.select_dtypes(include=[bool]) booleanDictionary = {True: 'True', False: 'False'} for column in booleandf: PrivAcc[column] = PrivAcc[column].map(booleanDictionary) def label_column_Did_UID_exist_last_quarter(row): if row['Did UID exist last quarter'] == 'True': return 'Existed last Quarter' else: return 'New User' PrivAcc['Did UID exist last quarter'] = PrivAcc.apply(lambda row: label_column_Did_UID_exist_last_quarter(row), axis=1) ###################################################################################################################### ###################################################################################################################### # Create a new tab that will display all Privileged and Non - Privileged Accounts allAccountsDF = PrivAcc ###################################################################################################################### ###################################################################################################################### ## Place a filter on Privileged Acces column to only show values that are equaled to true PrivAcc= PrivAcc.loc[PrivAcc['PrivilegedAccess'] == 'True'] ## Create a blank column called Approved/Revoked PrivAcc["Approved/Revoke"] = '' PrivAcc["Comments"] = '' ######################################################################################################### return userListing_DF,PrivAcc,allAccountsDF
call_DFs.py
import pandas as pd from Pre_Processing_File import pre_Processing def call_DFs(): userListing_DF,PrivAcc,allAccountsDF = pre_Processing(userListing_DF,PrivAcc,allAccountsDF) ## Create an excel file with multiple sheets # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter(r'DesktopTestPrivExportsDFs.xlsx', engine='openpyxl') # Write each dataframe to a different worksheet. userListing_DF.to_excel(writer, sheet_name='User Listing',index=False) PrivAcc.to_excel(writer, sheet_name='Privileged Accounts',index=False) allAccountsDF.to_excel(writer, sheet_name='All Accounts',index=False) # Close the Pandas Excel writer and output the Excel file. #writer.save() call_DFs()
Advertisement
Answer
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
should just be
def pre_Processing():
since it’s returning those three variables and doesn’t require them as arguments.
Then, when calling that, just
userListing_DF,PrivAcc,allAccountsDF = pre_Processing()