I have a python file which I have called Pre_Processing_File.py, this file has the function pre_Processing which loads in a text file and creates 3 data frames; userListing_DF,PrivAcc,allAccountsDF, this function then returns the 3 DFs.
What I want to do is create another script and import the 3 DFs from the pre_Processing.py file, I have created a script called call_DFs which I have also included below. The error I’m getting with this is: UnboundLocalError: local variable ‘userListing_DF’ referenced before assignment. I’m not sure where to define all 3 DFs within the script
Pre_Processing_File.py
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
## Import Libraries
import pandas as pd
import numpy as np
from pandas import DataFrame
dir1 = filedialog.askopenfile(mode="r", initialdir="/", title="select the first file",
filetypes=(("Text files", "*.txt"), ("all files", "*.*")))
dir2 = filedialog.askopenfile(mode="rb", initialdir="/", title="Select the second file",
filetypes=(("Excel files", "*.xlsx"), ("all files", "*.*")))
######################################################################################################################
######################################################################################################################
## Read in the single long column, and then form a group indicator by seeing where the value is '[User]'. Then separate the
## column labels and values, with a str.split and join back to your DataFrame. Finally pivot to your desired shape.
userListing_DF = pd.read_csv(dir1, sep='n', header=None)
userListing_DF['Group'] = userListing_DF[0].eq('[User]').cumsum() # This creates groups, groups everything until the next 'User' occurs
userListing_DF = userListing_DF[userListing_DF[0].ne('[User]')] # No longer need these rows
userListing_DF = pd.concat([userListing_DF, userListing_DF[0].str.split('=', expand=True).rename(columns={0: 'col', 1: 'val'})],
axis=1) # This performs a string slipt to seperate the headers with the User data
userListing_DF = userListing_DF.pivot(index='Group', columns='col', values='val').rename_axis(columns=None)
######################################################################################################################
######################################################################################################################
## Only keep some columns and remove the rest
userListing_DF = userListing_DF[['uid','last_name','first_name','role']]
#uid, last, first, role
## Copy data from the userListing_DF dataframe
PrivilegedAccounts_DF=pd.concat([userListing_DF],axis=1)
## Remove the multiIndex - cannot use below query with multi index you will get the following error
PrivilegedAccounts_DF.columns = PrivilegedAccounts_DF.columns.map(''.join)
## We cam also remove the MultiIndex for the below DF. If we don't the Index will stay and there will be other formatting
## problems when exporting to excel
userListing_DF.columns = userListing_DF.columns.map(''.join)
## Fill blank fields in the first name column with Nan
PrivilegedAccounts_DF = PrivilegedAccounts_DF.fillna( 'blank' )
## Create a function - If values in the first name column match the search criteria return True, else return false (Create new
## column called PrivilegedAccess to store information)
with open(r'DesktopTestPrivScriptdesignator_Priv_Accounts_List.txt') as f:
priv_List = f.readlines()
priv_List = [x.strip() for x in priv_List]
priv_List = list(filter(lambda x: x, priv_List))
Search_for_These_values = priv_List
pattern = '|'.join(Search_for_These_values)
PrivilegedAccounts_DF['PrivilegedAccess'] = PrivilegedAccounts_DF['first_name'].str.contains(pattern).astype(str)
## Create a function - Create a "Calc" column - If first name is blank then value will be "Privileged", If value is Diagnostics
## then enter "Diagnostics", If value is SYS then enter "SYS", If value is service
## account then enter "service account"
def label_columnCalc(row):
if row['first_name'] in Search_for_These_values:
return 'Privileged'
else:
return row['first_name']
PrivilegedAccounts_DF['Calc'] = PrivilegedAccounts_DF.apply(lambda row: label_columnCalc(row), axis=1)
######################################################################################################################
######################################################################################################################
# create a dataframe of last quarters Privileged Access Review Excel file
lastQuarterReview = pd.read_excel(dir2)
# Now look to see if any new User ID's exist since last quarter
PrivAcc = PrivilegedAccounts_DF
PrivAcc = PrivAcc.assign(lastQuarterReview=PrivAcc.uid.isin(lastQuarterReview.uid).astype(str))
# Rename the column
PrivAcc.rename(columns={'lastQuarterReview': 'Did UID exist last quarter'}, inplace=True)
######################################################################################################################
######################################################################################################################
# Change the boolean values in the Did UID exist last quarter to "Existed last Previous Quarter" or "New User"
booleandf = PrivAcc.select_dtypes(include=[bool])
booleanDictionary = {True: 'True', False: 'False'}
for column in booleandf:
PrivAcc[column] = PrivAcc[column].map(booleanDictionary)
def label_column_Did_UID_exist_last_quarter(row):
if row['Did UID exist last quarter'] == 'True':
return 'Existed last Quarter'
else:
return 'New User'
PrivAcc['Did UID exist last quarter'] = PrivAcc.apply(lambda row: label_column_Did_UID_exist_last_quarter(row), axis=1)
######################################################################################################################
######################################################################################################################
# Create a new tab that will display all Privileged and Non - Privileged Accounts
allAccountsDF = PrivAcc
######################################################################################################################
######################################################################################################################
## Place a filter on Privileged Acces column to only show values that are equaled to true
PrivAcc= PrivAcc.loc[PrivAcc['PrivilegedAccess'] == 'True']
## Create a blank column called Approved/Revoked
PrivAcc["Approved/Revoke"] = ''
PrivAcc["Comments"] = ''
#########################################################################################################
return userListing_DF,PrivAcc,allAccountsDF
call_DFs.py
import pandas as pd
from Pre_Processing_File import pre_Processing
def call_DFs():
userListing_DF,PrivAcc,allAccountsDF = pre_Processing(userListing_DF,PrivAcc,allAccountsDF)
## Create an excel file with multiple sheets
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(r'DesktopTestPrivExportsDFs.xlsx', engine='openpyxl')
# Write each dataframe to a different worksheet.
userListing_DF.to_excel(writer, sheet_name='User Listing',index=False)
PrivAcc.to_excel(writer, sheet_name='Privileged Accounts',index=False)
allAccountsDF.to_excel(writer, sheet_name='All Accounts',index=False)
# Close the Pandas Excel writer and output the Excel file.
#writer.save()
call_DFs()
Advertisement
Answer
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
should just be
def pre_Processing():
since it’s returning those three variables and doesn’t require them as arguments.
Then, when calling that, just
userListing_DF,PrivAcc,allAccountsDF = pre_Processing()