I have a python file which I have called Pre_Processing_File.py, this file has the function pre_Processing which loads in a text file and creates 3 data frames; userListing_DF,PrivAcc,allAccountsDF, this function then returns the 3 DFs.
What I want to do is create another script and import the 3 DFs from the pre_Processing.py file, I have created a script called call_DFs which I have also included below. The error I’m getting with this is: UnboundLocalError: local variable ‘userListing_DF’ referenced before assignment. I’m not sure where to define all 3 DFs within the script
Pre_Processing_File.py
JavaScript
x
136
136
1
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
2
3
4
## Import Libraries
5
import pandas as pd
6
import numpy as np
7
from pandas import DataFrame
8
9
dir1 = filedialog.askopenfile(mode="r", initialdir="/", title="select the first file",
10
filetypes=(("Text files", "*.txt"), ("all files", "*.*")))
11
12
13
dir2 = filedialog.askopenfile(mode="rb", initialdir="/", title="Select the second file",
14
filetypes=(("Excel files", "*.xlsx"), ("all files", "*.*")))
15
16
17
######################################################################################################################
18
######################################################################################################################
19
## Read in the single long column, and then form a group indicator by seeing where the value is '[User]'. Then separate the
20
## column labels and values, with a str.split and join back to your DataFrame. Finally pivot to your desired shape.
21
22
userListing_DF = pd.read_csv(dir1, sep='n', header=None)
23
24
userListing_DF['Group'] = userListing_DF[0].eq('[User]').cumsum() # This creates groups, groups everything until the next 'User' occurs
25
userListing_DF = userListing_DF[userListing_DF[0].ne('[User]')] # No longer need these rows
26
27
userListing_DF = pd.concat([userListing_DF, userListing_DF[0].str.split('=', expand=True).rename(columns={0: 'col', 1: 'val'})],
28
axis=1) # This performs a string slipt to seperate the headers with the User data
29
30
userListing_DF = userListing_DF.pivot(index='Group', columns='col', values='val').rename_axis(columns=None)
31
32
######################################################################################################################
33
######################################################################################################################
34
35
## Only keep some columns and remove the rest
36
userListing_DF = userListing_DF[['uid','last_name','first_name','role']]
37
38
#uid, last, first, role
39
## Copy data from the userListing_DF dataframe
40
PrivilegedAccounts_DF=pd.concat([userListing_DF],axis=1)
41
42
43
## Remove the multiIndex - cannot use below query with multi index you will get the following error
44
PrivilegedAccounts_DF.columns = PrivilegedAccounts_DF.columns.map(''.join)
45
46
## We cam also remove the MultiIndex for the below DF. If we don't the Index will stay and there will be other formatting
47
## problems when exporting to excel
48
userListing_DF.columns = userListing_DF.columns.map(''.join)
49
50
## Fill blank fields in the first name column with Nan
51
PrivilegedAccounts_DF = PrivilegedAccounts_DF.fillna( 'blank' )
52
53
54
## Create a function - If values in the first name column match the search criteria return True, else return false (Create new
55
## column called PrivilegedAccess to store information)
56
57
with open(r'DesktopTestPrivScriptdesignator_Priv_Accounts_List.txt') as f:
58
priv_List = f.readlines()
59
priv_List = [x.strip() for x in priv_List]
60
priv_List = list(filter(lambda x: x, priv_List))
61
62
63
64
Search_for_These_values = priv_List
65
pattern = '|'.join(Search_for_These_values)
66
67
PrivilegedAccounts_DF['PrivilegedAccess'] = PrivilegedAccounts_DF['first_name'].str.contains(pattern).astype(str)
68
69
## Create a function - Create a "Calc" column - If first name is blank then value will be "Privileged", If value is Diagnostics
70
## then enter "Diagnostics", If value is SYS then enter "SYS", If value is service
71
## account then enter "service account"
72
73
def label_columnCalc(row):
74
if row['first_name'] in Search_for_These_values:
75
return 'Privileged'
76
else:
77
return row['first_name']
78
79
PrivilegedAccounts_DF['Calc'] = PrivilegedAccounts_DF.apply(lambda row: label_columnCalc(row), axis=1)
80
81
######################################################################################################################
82
######################################################################################################################
83
84
# create a dataframe of last quarters Privileged Access Review Excel file
85
lastQuarterReview = pd.read_excel(dir2)
86
87
88
# Now look to see if any new User ID's exist since last quarter
89
PrivAcc = PrivilegedAccounts_DF
90
PrivAcc = PrivAcc.assign(lastQuarterReview=PrivAcc.uid.isin(lastQuarterReview.uid).astype(str))
91
92
# Rename the column
93
PrivAcc.rename(columns={'lastQuarterReview': 'Did UID exist last quarter'}, inplace=True)
94
95
######################################################################################################################
96
######################################################################################################################
97
98
# Change the boolean values in the Did UID exist last quarter to "Existed last Previous Quarter" or "New User"
99
booleandf = PrivAcc.select_dtypes(include=[bool])
100
booleanDictionary = {True: 'True', False: 'False'}
101
102
for column in booleandf:
103
PrivAcc[column] = PrivAcc[column].map(booleanDictionary)
104
105
106
def label_column_Did_UID_exist_last_quarter(row):
107
if row['Did UID exist last quarter'] == 'True':
108
return 'Existed last Quarter'
109
else:
110
return 'New User'
111
112
PrivAcc['Did UID exist last quarter'] = PrivAcc.apply(lambda row: label_column_Did_UID_exist_last_quarter(row), axis=1)
113
114
######################################################################################################################
115
######################################################################################################################
116
117
# Create a new tab that will display all Privileged and Non - Privileged Accounts
118
allAccountsDF = PrivAcc
119
120
######################################################################################################################
121
######################################################################################################################
122
123
124
## Place a filter on Privileged Acces column to only show values that are equaled to true
125
PrivAcc= PrivAcc.loc[PrivAcc['PrivilegedAccess'] == 'True']
126
127
## Create a blank column called Approved/Revoked
128
PrivAcc["Approved/Revoke"] = ''
129
PrivAcc["Comments"] = ''
130
131
#########################################################################################################
132
133
134
return userListing_DF,PrivAcc,allAccountsDF
135
136
call_DFs.py
JavaScript
1
29
29
1
import pandas as pd
2
3
from Pre_Processing_File import pre_Processing
4
5
6
7
def call_DFs():
8
9
userListing_DF,PrivAcc,allAccountsDF = pre_Processing(userListing_DF,PrivAcc,allAccountsDF)
10
11
12
13
## Create an excel file with multiple sheets
14
15
# Create a Pandas Excel writer using XlsxWriter as the engine.
16
writer = pd.ExcelWriter(r'DesktopTestPrivExportsDFs.xlsx', engine='openpyxl')
17
18
# Write each dataframe to a different worksheet.
19
userListing_DF.to_excel(writer, sheet_name='User Listing',index=False)
20
PrivAcc.to_excel(writer, sheet_name='Privileged Accounts',index=False)
21
allAccountsDF.to_excel(writer, sheet_name='All Accounts',index=False)
22
23
24
# Close the Pandas Excel writer and output the Excel file.
25
#writer.save()
26
27
28
call_DFs()
29
Advertisement
Answer
JavaScript
1
2
1
def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
2
should just be
JavaScript
1
2
1
def pre_Processing():
2
since it’s returning those three variables and doesn’t require them as arguments.
Then, when calling that, just
JavaScript
1
2
1
userListing_DF,PrivAcc,allAccountsDF = pre_Processing()
2