Skip to content
Advertisement

Concatenate strings (‘not float’)

Hello I have 3 different dataframes whose values I want to add.

I have the following dataframes below and the code I used to add the values all together but I encountered an error. I would appreciate any help, thank you very much.

df1 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/20km_hr_lane1_acc_middle_girder.csv')
df1.head()

output:

        TIME    DZ_middle_G_L1_20km
0       0.001   1.021e-003
1       0.002   1.597e-003
2       0.003   1.564e-003
3       0.004   1.031e-003
4       0.005   3.022e-004


df2 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/20km_hr_lane2_acc_middle_girder.csv')
df2.head()

output:

    TIME    DZ_middle_G_L2_20km
0   0.001   -6.168e-005
1   0.002   -9.240e-005
2   0.003   -7.781e-005
3   0.004   -1.962e-005
4   0.005    6.591e-005


df3 = pd.read_csv('/content/drive/MyDrive/노후교량/DZ_Middle_G/25km_hr_lane1_acc_middle_girder.csv')
df3.head()

output:

    TIME    DZ_middle_G_L1_25km
0   0.001   1.277e-003
1   0.002   1.996e-003
2   0.003   1.955e-003
3   0.004   1.288e-003
4   0.005   3.777e-004

I used this code to add the values in the dataframes:

df = df1.merge(df2.assign(TIME=df2['TIME']+1.000), how='outer') 
    .merge(df3.assign(TIME=df3['TIME']+2.000), how='outer')
df['DZ_middle_G_L2_20km'] += df['DZ_middle_G_L1_20km'].fillna(0)
df['DZ_middle_G_L1_25km'] += df['DZ_middle_G_L2_20km'].fillna(0)

However I got this Error:

TypeError: can only concatenate str (not "float") to str    

Here is an example of how I want the result will be:

Final DataFrame:

TIME df1              df2              df3
0    1.021e-003
1    1.597e-003
2    1.564e-003
3    1.031e-003       -6.168e-005
4    3.022e-004       -9.240e-005
5                     -7.781e-005
6                     -1.962e-00       1.277e-003
7                      6.591e-005      1.996e-003
8                                      1.955e-003
9                                      1.288e-003
10                                     3.777e-004                   

looks like this but the values are added together or subtracted.

Advertisement

Answer

Here is the pandas output:

9995    9.996   1.630000e-09  -4.715000e-06  -1.146000e-02 -1.146471e-02
9996    9.997   1.612000e-09  -4.701000e-06  -1.129000e-02 -1.129470e-02
9997    9.998   1.589000e-09  -4.671000e-06  -1.192000e-02 -1.192467e-02
9998    9.999   1.561000e-09  -4.624000e-06  -1.191000e-02 -1.191462e-02
9999   10.000   1.527000e-09  -4.560000e-06  -1.023000e-02 -1.023456e-02
10000  10.001            NaN  -4.481000e-06  -6.724000e-03 -6.728481e-03
10001  10.002            NaN  -4.386000e-06   3.137000e-03  3.132614e-03
10002  10.003            NaN  -4.277000e-06   4.288000e-03  4.283723e-03
10003  10.004            NaN  -4.152000e-06   3.145000e-03  3.140848e-03
10004  10.005            NaN  -4.014000e-06   1.354000e-03  1.349986e-03
10005  10.006            NaN  -3.862000e-06   4.362000e-04  4.323380e-04

Here is the csv output:

TIME,20km_hr_lane1,20km_hr_lane2,25km_hr_lane1
00.001,0.001021,no_data,no_data
00.002,0.001597,no_data,no_data
00.003,0.001564,no_data,no_data
...
02.997,0.0001386,no_data,no_data
02.998,0.0004103,no_data,no_data
02.999,0.0006737,no_data,no_data
03.000,0.0009299,-0.00006168,no_data
03.001,0.001174,-0.00009240,no_data
03.002,0.001395,-0.00007781,no_data
...
05.997,-0.00004084,-0.0006072,no_data
05.998,-0.00004350,-0.000005450,no_data
05.999,-0.00004683,0.0005951,no_data
06.000,-0.00005086,0.001167,0.001277
06.001,-0.00005560,0.001686,0.001996
06.002,-0.00006101,0.002133,0.001955
...
09.998,1.589E-9,-0.000004671,-0.01192
09.999,1.561E-9,-0.000004624,-0.01191
10.000,1.527E-9,-0.000004560,-0.01023
10.001,no_data,-0.000004481,-0.006724
10.002,no_data,-0.000004386,0.003137
10.003,no_data,-0.000004277,0.004288
...
12.997,no_data,7.729E-10,3.074E-7
12.998,no_data,7.643E-10,3.045E-7
12.999,no_data,7.532E-10,3.006E-7
13.000,no_data,no_data,2.957E-7
13.001,no_data,no_data,2.899E-7
13.002,no_data,no_data,2.832E-7

Here is the code:

from decimal import Decimal
import io
from contextlib import redirect_stdout
import os
import pandas as pd



file_list = ["20km_hr_lane1_acc_middle_girder.csv","20km_hr_lane2_acc_middle_girder.csv","25km_hr_lane1_acc_middle_girder.csv"] #TODO CHANGE THIS: filename as list of string datatype
step_value = "0.001" #TODO CHANGE THIS: step value as string datatype
output_filename = "output.csv" #TODO CHANGE THIS: output filename as string datatype

def retProper(temp3):
    global step_value
    try:
        temp5 = len(step_value.split(".")[1])
    except:
        temp5 = 0
    return round(Decimal(temp3),temp5)

min_limit = retProper(0.001) #TODO CHANGE THIS: minimum time as decimal datatype
max_limit = retProper(10) #TODO CHANGE THIS: maximum time as decimal datatype
offset_list = [retProper(0.001),retProper(3),retProper(6)] #TODO CHANGE THIS: offset as list of decimal datatype

def retPrintable(temp4):
    global max_limit
    return str(temp4).zfill(len(str(max_limit)))

temp1 = min_limit
temp2 = []
temp10 = retProper(Decimal(step_value))
while True:
    temp2.append(temp1)
    temp1 = temp1 + temp10
    if temp1 == max(offset_list)+max_limit:
        break

for temp6 in range(len(file_list)):
    exec("temp7_"+str(temp6)+" = []")
    with open(file_list[temp6]) as file_in:
        next(file_in)
        for line in file_in:
            try:
                exec("temp7_"+str(temp6)+".append(Decimal(line.split(',')[1].strip()))")
            except:
                break

def retEligible(current_index,temp11):
    global offset_list
    global temp2
    global step_value
    global temp10
    minimum_index = offset_list[temp11]
    if current_index >= minimum_index:
        try:
            exec("print(str(temp7_"+str(temp11)+"["+str(temp2.index(current_index-minimum_index+temp10))+"])+',',end='')")
        except IndexError:
            print("no_data,",end='')
    else:
        print("no_data,",end='')

with io.StringIO() as buf, redirect_stdout(buf):
    for temp8 in range(len(temp2)):
        temp9 = retPrintable(temp2[temp8])+","
        print(temp9,end='')
        for temp12 in range(len(file_list)):
            retEligible(temp2[temp8],temp12)
        print("n",end='')
    output = buf.getvalue()

try:
    os.remove(output_filename)
except:
    pass

def long_substr(data):
    substr = ''
    if len(data) > 1 and len(data[0]) > 0:
        for i in range(len(data[0])):
            for j in range(len(data[0])-i+1):
                if j > len(substr) and all(data[0][i:i+j] in x for x in data):
                    substr = data[0][i:i+j]
    return substr

temp14 = long_substr(file_list)
temp15 = file_list
for temp16 in range(len(temp15)):
    temp15[temp16] = temp15[temp16].replace(temp14,"")
f = open(output_filename, "w")
f.write("TIME,"+','.join(temp15)+"n")
for temp13 in range(len(output.split("n"))):
    f.write(output.split("n")[temp13][:-1]+"n")
f.close()

df = pd.read_csv(output_filename, na_values = ["no_data"])
df[list(df.columns[1:])] = df[list(df.columns[1:])].astype('float64')
df['sum'] = df[list(df.columns[1:])].sum(axis=1)
print(df)
User contributions licensed under: CC BY-SA
6 People found this is helpful
Advertisement