Skip to content
Advertisement

import 2 dataframes from a function in a different python file

I have a python file which I have called Pre_Processing_File.py, this file has the function pre_Processing which loads in a text file and creates 3 data frames; userListing_DF,PrivAcc,allAccountsDF, this function then returns the 3 DFs.

What I want to do is create another script and import the 3 DFs from the pre_Processing.py file, I have created a script called call_DFs which I have also included below. The error I’m getting with this is: UnboundLocalError: local variable ‘userListing_DF’ referenced before assignment. I’m not sure where to define all 3 DFs within the script

Pre_Processing_File.py

    def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):
        
        
        ## Import Libraries
        import pandas as pd
        import numpy as np
        from pandas import DataFrame
        
        dir1 = filedialog.askopenfile(mode="r", initialdir="/", title="select the first file",
                                      filetypes=(("Text files", "*.txt"), ("all files", "*.*")))
    
    
        dir2 = filedialog.askopenfile(mode="rb", initialdir="/", title="Select the second file",
                                      filetypes=(("Excel files", "*.xlsx"), ("all files", "*.*")))
        
        
        ######################################################################################################################
        ######################################################################################################################
        ## Read in the single long column, and then form a group indicator by seeing where the value is '[User]'. Then separate the 
        ## column labels and values, with a str.split and join back to your DataFrame. Finally pivot to your desired shape.
        
        userListing_DF = pd.read_csv(dir1, sep='n', header=None)
    
        userListing_DF['Group'] = userListing_DF[0].eq('[User]').cumsum() # This creates groups, groups everything until the next 'User' occurs
        userListing_DF = userListing_DF[userListing_DF[0].ne('[User]')]  # No longer need these rows
    
        userListing_DF = pd.concat([userListing_DF, userListing_DF[0].str.split('=', expand=True).rename(columns={0: 'col', 1: 'val'})],
                                   axis=1) # This performs a string slipt to seperate the headers with the User data
    
        userListing_DF = userListing_DF.pivot(index='Group', columns='col', values='val').rename_axis(columns=None)
        
        ######################################################################################################################
        ######################################################################################################################
        
        ## Only keep some columns and remove the rest
        userListing_DF = userListing_DF[['uid','last_name','first_name','role']]
        
        #uid, last, first, role
        ## Copy data from the userListing_DF dataframe
        PrivilegedAccounts_DF=pd.concat([userListing_DF],axis=1)
    
        
        ## Remove the multiIndex - cannot use below query with multi index you will get the following error
        PrivilegedAccounts_DF.columns = PrivilegedAccounts_DF.columns.map(''.join)
    
        ## We cam also remove the MultiIndex for the below DF. If we don't the Index will stay and there will be other formatting 
        ## problems when exporting to excel
        userListing_DF.columns = userListing_DF.columns.map(''.join)
        
        ## Fill blank fields in the first name column with Nan
        PrivilegedAccounts_DF = PrivilegedAccounts_DF.fillna( 'blank' )
        
        
        ## Create a function - If values in the first name column match the search criteria return True, else return false (Create new 
        ##                     column called PrivilegedAccess to store information)
    
        with open(r'DesktopTestPrivScriptdesignator_Priv_Accounts_List.txt') as f:
            priv_List = f.readlines()
            priv_List = [x.strip() for x in priv_List] 
            priv_List = list(filter(lambda x: x, priv_List))
            
            
    
        Search_for_These_values = priv_List
        pattern = '|'.join(Search_for_These_values)
    
        PrivilegedAccounts_DF['PrivilegedAccess'] = PrivilegedAccounts_DF['first_name'].str.contains(pattern).astype(str)
        
        ## Create a function - Create a "Calc" column - If first name is blank then value will be "Privileged", If value is Diagnostics
        ##                                              then enter "Diagnostics", If value is SYS then enter "SYS", If value is service 
        ##                                              account then enter "service account"
    
        def label_columnCalc(row):
            if row['first_name'] in Search_for_These_values:
                return 'Privileged'
            else:
                return row['first_name']
    
        PrivilegedAccounts_DF['Calc'] = PrivilegedAccounts_DF.apply(lambda row: label_columnCalc(row), axis=1)
        
        ######################################################################################################################
        ######################################################################################################################    
        
        # create a dataframe of last quarters Privileged Access Review Excel file
        lastQuarterReview = pd.read_excel(dir2)
        
        
        # Now look to see if any new User ID's exist since last quarter
        PrivAcc = PrivilegedAccounts_DF
        PrivAcc = PrivAcc.assign(lastQuarterReview=PrivAcc.uid.isin(lastQuarterReview.uid).astype(str))
        
        # Rename the column 
        PrivAcc.rename(columns={'lastQuarterReview': 'Did UID exist last quarter'}, inplace=True)
        
        ######################################################################################################################
        ######################################################################################################################    
        
        # Change the boolean values in the Did UID exist last quarter to "Existed last Previous Quarter" or "New User"
        booleandf = PrivAcc.select_dtypes(include=[bool])
        booleanDictionary = {True: 'True', False: 'False'}
    
        for column in booleandf:
            PrivAcc[column] = PrivAcc[column].map(booleanDictionary)
            
        
        def label_column_Did_UID_exist_last_quarter(row):
            if row['Did UID exist last quarter'] == 'True':
                return 'Existed last Quarter'
            else:
                return 'New User'
            
        PrivAcc['Did UID exist last quarter'] = PrivAcc.apply(lambda row: label_column_Did_UID_exist_last_quarter(row), axis=1)
        
        ######################################################################################################################
        ######################################################################################################################
        
        # Create a new tab that will display all Privileged and Non - Privileged Accounts
        allAccountsDF = PrivAcc
        
        ######################################################################################################################
        ######################################################################################################################   
        
        
        ## Place a filter on Privileged Acces column to only show values that are equaled to true
        PrivAcc= PrivAcc.loc[PrivAcc['PrivilegedAccess'] == 'True']
        
        ## Create a blank column called Approved/Revoked
        PrivAcc["Approved/Revoke"] = ''
        PrivAcc["Comments"] = ''
        
        #########################################################################################################
     
        
        return userListing_DF,PrivAcc,allAccountsDF

call_DFs.py

    import pandas as pd
    
    from Pre_Processing_File import pre_Processing
    
    
    
    def call_DFs():
        
        userListing_DF,PrivAcc,allAccountsDF = pre_Processing(userListing_DF,PrivAcc,allAccountsDF)
        
        
        
        ## Create an excel file with multiple sheets
    
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        writer = pd.ExcelWriter(r'DesktopTestPrivExportsDFs.xlsx', engine='openpyxl')
    
        # Write each dataframe to a different worksheet.
        userListing_DF.to_excel(writer, sheet_name='User Listing',index=False)
        PrivAcc.to_excel(writer, sheet_name='Privileged Accounts',index=False)
        allAccountsDF.to_excel(writer, sheet_name='All Accounts',index=False)
    
    
        # Close the Pandas Excel writer and output the Excel file.
        #writer.save()
    
    
    call_DFs()

Advertisement

Answer

def pre_Processing(userListing_DF,PrivAcc,allAccountsDF):

should just be

def pre_Processing():

since it’s returning those three variables and doesn’t require them as arguments.

Then, when calling that, just

userListing_DF,PrivAcc,allAccountsDF = pre_Processing()
User contributions licensed under: CC BY-SA
10 People found this is helpful
Advertisement