Hello I have 3 different dataframes whose values I want to add.
I have the following dataframes below and the code I used to add the values all together but I encountered an error. I would appreciate any help, thank you very much.
df1 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/20km_hr_lane1_acc_middle_girder.csv') df1.head() output: TIME DZ_middle_G_L1_20km 0 0.001 1.021e-003 1 0.002 1.597e-003 2 0.003 1.564e-003 3 0.004 1.031e-003 4 0.005 3.022e-004 df2 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/20km_hr_lane2_acc_middle_girder.csv') df2.head() output: TIME DZ_middle_G_L2_20km 0 0.001 -6.168e-005 1 0.002 -9.240e-005 2 0.003 -7.781e-005 3 0.004 -1.962e-005 4 0.005 6.591e-005 df3 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/25km_hr_lane1_acc_middle_girder.csv') df3.head() output: TIME DZ_middle_G_L1_25km 0 0.001 1.277e-003 1 0.002 1.996e-003 2 0.003 1.955e-003 3 0.004 1.288e-003 4 0.005 3.777e-004
I used this code to add the values in the dataframes:
df = df1.merge(df2.assign(TIME=df2['TIME']+1.000), how='outer') .merge(df3.assign(TIME=df3['TIME']+2.000), how='outer') df['DZ_middle_G_L2_20km'] += df['DZ_middle_G_L1_20km'].fillna(0) df['DZ_middle_G_L1_25km'] += df['DZ_middle_G_L2_20km'].fillna(0)
However I got this Error:
TypeError: can only concatenate str (not "float") to str
Here is an example of how I want the result will be:
Final DataFrame:
TIME df1 df2 df3 0 1.021e-003 1 1.597e-003 2 1.564e-003 3 1.031e-003 -6.168e-005 4 3.022e-004 -9.240e-005 5 -7.781e-005 6 -1.962e-00 1.277e-003 7 6.591e-005 1.996e-003 8 1.955e-003 9 1.288e-003 10 3.777e-004
looks like this but the values are added together or subtracted.
Advertisement
Answer
Here is the pandas output:
9995 9.996 1.630000e-09 -4.715000e-06 -1.146000e-02 -1.146471e-02 9996 9.997 1.612000e-09 -4.701000e-06 -1.129000e-02 -1.129470e-02 9997 9.998 1.589000e-09 -4.671000e-06 -1.192000e-02 -1.192467e-02 9998 9.999 1.561000e-09 -4.624000e-06 -1.191000e-02 -1.191462e-02 9999 10.000 1.527000e-09 -4.560000e-06 -1.023000e-02 -1.023456e-02 10000 10.001 NaN -4.481000e-06 -6.724000e-03 -6.728481e-03 10001 10.002 NaN -4.386000e-06 3.137000e-03 3.132614e-03 10002 10.003 NaN -4.277000e-06 4.288000e-03 4.283723e-03 10003 10.004 NaN -4.152000e-06 3.145000e-03 3.140848e-03 10004 10.005 NaN -4.014000e-06 1.354000e-03 1.349986e-03 10005 10.006 NaN -3.862000e-06 4.362000e-04 4.323380e-04
Here is the csv output:
TIME,20km_hr_lane1,20km_hr_lane2,25km_hr_lane1 00.001,0.001021,no_data,no_data 00.002,0.001597,no_data,no_data 00.003,0.001564,no_data,no_data ... 02.997,0.0001386,no_data,no_data 02.998,0.0004103,no_data,no_data 02.999,0.0006737,no_data,no_data 03.000,0.0009299,-0.00006168,no_data 03.001,0.001174,-0.00009240,no_data 03.002,0.001395,-0.00007781,no_data ... 05.997,-0.00004084,-0.0006072,no_data 05.998,-0.00004350,-0.000005450,no_data 05.999,-0.00004683,0.0005951,no_data 06.000,-0.00005086,0.001167,0.001277 06.001,-0.00005560,0.001686,0.001996 06.002,-0.00006101,0.002133,0.001955 ... 09.998,1.589E-9,-0.000004671,-0.01192 09.999,1.561E-9,-0.000004624,-0.01191 10.000,1.527E-9,-0.000004560,-0.01023 10.001,no_data,-0.000004481,-0.006724 10.002,no_data,-0.000004386,0.003137 10.003,no_data,-0.000004277,0.004288 ... 12.997,no_data,7.729E-10,3.074E-7 12.998,no_data,7.643E-10,3.045E-7 12.999,no_data,7.532E-10,3.006E-7 13.000,no_data,no_data,2.957E-7 13.001,no_data,no_data,2.899E-7 13.002,no_data,no_data,2.832E-7
Here is the code:
from decimal import Decimal import io from contextlib import redirect_stdout import os import pandas as pd file_list = ["20km_hr_lane1_acc_middle_girder.csv","20km_hr_lane2_acc_middle_girder.csv","25km_hr_lane1_acc_middle_girder.csv"] #TODO CHANGE THIS: filename as list of string datatype step_value = "0.001" #TODO CHANGE THIS: step value as string datatype output_filename = "output.csv" #TODO CHANGE THIS: output filename as string datatype def retProper(temp3): global step_value try: temp5 = len(step_value.split(".")[1]) except: temp5 = 0 return round(Decimal(temp3),temp5) min_limit = retProper(0.001) #TODO CHANGE THIS: minimum time as decimal datatype max_limit = retProper(10) #TODO CHANGE THIS: maximum time as decimal datatype offset_list = [retProper(0.001),retProper(3),retProper(6)] #TODO CHANGE THIS: offset as list of decimal datatype def retPrintable(temp4): global max_limit return str(temp4).zfill(len(str(max_limit))) temp1 = min_limit temp2 = [] temp10 = retProper(Decimal(step_value)) while True: temp2.append(temp1) temp1 = temp1 + temp10 if temp1 == max(offset_list)+max_limit: break for temp6 in range(len(file_list)): exec("temp7_"+str(temp6)+" = []") with open(file_list[temp6]) as file_in: next(file_in) for line in file_in: try: exec("temp7_"+str(temp6)+".append(Decimal(line.split(',')[1].strip()))") except: break def retEligible(current_index,temp11): global offset_list global temp2 global step_value global temp10 minimum_index = offset_list[temp11] if current_index >= minimum_index: try: exec("print(str(temp7_"+str(temp11)+"["+str(temp2.index(current_index-minimum_index+temp10))+"])+',',end='')") except IndexError: print("no_data,",end='') else: print("no_data,",end='') with io.StringIO() as buf, redirect_stdout(buf): for temp8 in range(len(temp2)): temp9 = retPrintable(temp2[temp8])+"," print(temp9,end='') for temp12 in range(len(file_list)): retEligible(temp2[temp8],temp12) print("n",end='') output = buf.getvalue() try: os.remove(output_filename) except: pass def long_substr(data): substr = '' if len(data) > 1 and len(data[0]) > 0: for i in range(len(data[0])): for j in range(len(data[0])-i+1): if j > len(substr) and all(data[0][i:i+j] in x for x in data): substr = data[0][i:i+j] return substr temp14 = long_substr(file_list) temp15 = file_list for temp16 in range(len(temp15)): temp15[temp16] = temp15[temp16].replace(temp14,"") f = open(output_filename, "w") f.write("TIME,"+','.join(temp15)+"n") for temp13 in range(len(output.split("n"))): f.write(output.split("n")[temp13][:-1]+"n") f.close() df = pd.read_csv(output_filename, na_values = ["no_data"]) df[list(df.columns[1:])] = df[list(df.columns[1:])].astype('float64') df['sum'] = df[list(df.columns[1:])].sum(axis=1) print(df)