I try to concat some dataframe – 30 dataframe of 24h data – that been created automatically with some csv, but sometimes csv doesn’t exist, so the dataframe wasn’t created (df1, fd2, df4,df8,df9,…).
And so I want to create weekly dataframe with 7 concatenated df, but the function is in error when some df is missing. But I want the weekly df created with a empty period if some is missing.
dfweek1 = pd.concat([df1, df2, df3, df4, df5, df6, df7])
dfweek2 = pd.concat([df8, df9, df10, df11, df12, df13, df14])
dfweek3 = pd.concat([df15, df16, df17, df18, df19, df20, df21])
dfweek4 = pd.concat([df22, df23, df24, df25, df26, df27, df28])
When I execute line by line for testing :
dfweek2 = pd.concat([df9, df10, df11, df12, df13, df14, df15])
Traceback (most recent call last):
File "/tmp/ipykernel_40958/2223069184.py", line 1, in <cell line: 1>
dfweek2 = pd.concat([df9, df10, df11, df12, df13, df14, df15])
NameError: name 'df14' is not defined
df14 is missing, so dfweek2 is not created.
######################
EDIT : here is all my code for more comprehensive, maybe (probably) I didn’t choose the best way… Ignore all the interpolating things, that for later (maybe).
######################
from datetime import date, timedelta, datetime
import pandas as pd
import argparse
import locale
locale.setlocale(locale.LC_TIME,'')
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--date", help="Start Date in YYYY-MM-DD format", required=False, default=None)
return parser.parse_args()
args = get_arguments()
random_date = args.date
#print(args.date)
startdate=pd.to_datetime(args.date,format="%Y-%m-%d").strftime("%Y-%m-%d")
enddate = pd.to_datetime(startdate) + pd.DateOffset(months=1)
enddate = pd.to_datetime(enddate,format="%Y-%m-%d").strftime("%Y-%m-%d")
filedate = startdate
file=('consommation_horaire_'+str(filedate)+'.csv')
x = 1
df = {}
#ts = {}
#upsampled = {}
#interpol = {}
#recupération d'un mois de fichiers de 24h
while filedate != enddate :
try :
file=('consommation_horaire_'+str(filedate)+'.csv')
#globals()[f"df{x}"] = pd.read_csv(file, sep=",", on_bad_lines='skip', header=0, parse_dates=["date"], names =["date","kwh"])
globals()[f"df{x}"] = pd.read_csv(file, sep=",", on_bad_lines='skip', header=0, names =["date","kwh"])
globals()[f"df{x}"]['date'] = pd.to_datetime(globals()[f"df{x}"]['date'], format='%d/%m/%Y %H:%M')
globals()[f"df{x}"] = globals()[f"df{x}"].set_index('date')
#globals()[f"ts{x}"] = globals()[f"df{x}"]['kwh']
#globals()[f"upsampled{x}"] = globals()[f"ts{x}"].resample('60s')
#globals()[f"interpol{x}"] = globals()[f"upsampled{x}"].interpolate(method='quadratic', order=2)
#print(globals()[f"ts{x}"].head(1))
x += 1
filedate = pd.to_datetime(filedate,format="%Y-%m-%d") + timedelta(days=1)
filedate = pd.to_datetime(filedate,format="%Y-%m-%d").strftime("%Y-%m-%d")
except FileNotFoundError :
print('fichier ' +file+ ' manquant')
x += 1
filedate = pd.to_datetime(filedate,format="%Y-%m-%d") + timedelta(days=1)
filedate = pd.to_datetime(filedate,format="%Y-%m-%d").strftime("%Y-%m-%d")
pass
dfweek1 = pd.concat([df1, df2, df3, df4, df5, df6, df7])
dfweek2 = pd.concat([df8, df9, df10, df11, df12, df13, df14])
dfweek3 = pd.concat([df15, df16, df17, df18, df19, df20, df21])
dfweek4 = pd.concat([df22, df23, df24, df25, df26, df27, df28])
Advertisement
Answer
I find something that’s working, even if I think that’s not the best way, here is my code, that a little long :
synthesis : The syntax globals()[f”df{x}”] is pretty useful to create new datarame automatically in loop, “df” is the name of the dataframe and “x” is its number (see below), but you have to create a dictionary first with df = {}.
Note : unlike my first post, in this code I replaced the ‘arg’ def part with a constant value ‘date’, during my development.
#########
date='2021-04-01'
startdate=pd.to_datetime(date,format="%Y-%m-%d").strftime("%Y-%m-%d")
enddate = pd.to_datetime(startdate) + pd.DateOffset(months=1)
enddate = pd.to_datetime(enddate,format="%Y-%m-%d").strftime("%Y-%m-%d")
#creating dictionaries to generate alldataframe
filedate = startdate
x = 1
df = {}
while filedate != enddate :
try :
file=('consommation_horaire_'+str(filedate)+'.csv')
globals()[f"df{x}"] = pd.read_csv(file, sep=",", on_bad_lines='skip', header=0, names =["date","kwh"])
globals()[f"df{x}"]['date'] = pd.to_datetime(globals()[f"df{x}"]['date'], format='%d/%m/%Y %H:%M')
globals()[f"df{x}"] = globals()[f"df{x}"].set_index('date')
filedate = pd.to_datetime(filedate,format="%Y-%m-%d") + timedelta(days=1)
filedate = pd.to_datetime(filedate,format="%Y-%m-%d").strftime("%Y-%m-%d")
x += 1
except FileNotFoundError :
print('fichier ' +file+ ' manquant')
x += 1
filedate = pd.to_datetime(filedate,format="%Y-%m-%d") + timedelta(days=1)
filedate = pd.to_datetime(filedate,format="%Y-%m-%d").strftime("%Y-%m-%d")
pass
#creating an empty dataframe or completing it if doesn't exist or incomplete
df = {}
days=pd.to_datetime(date,format="%Y-%m-%d").days_in_month
for x in range(1,days):
try:
if globals()[f"df{x}"].shape[0] < 48 :
#filling empty lines
testdate=globals()[f"df{x}"].index[0].strftime("%Y-%m-%d")
testdate1 = pd.to_datetime(testdate,format="%Y-%m-%d %H:%M:%S")
testdate2 = pd.to_datetime(testdate,format="%Y-%m-%d %H:%M:%S")+ timedelta(hours=23.5)
fillindex=pd.date_range(start=testdate1, end=testdate2, freq='30min')
globals()[f"df{x}"]=globals()[f"df{x}"].reindex(fillindex)
else:
pass
except :
globals()[f"df{x}"] = pd.DataFrame(index=range(48),columns=range(1))
globals()[f"df{x}"].columns = ['kwh']
testdate=pd.to_datetime(date,format="%Y-%m-%d %H:%M:%S")+ timedelta(days=x-1)
testdate1 = pd.to_datetime(testdate,format="%Y-%m-%d %H:%M:%S")
testdate2 = pd.to_datetime(testdate,format="%Y-%m-%d %H:%M:%S")+ timedelta(hours=23.5)
fillindex=pd.date_range(start=testdate1, end=testdate2, freq='30min')
globals()[f"df{x}"].index.names = ['date']
globals()[f"df{x}"].index = pd.DatetimeIndex(fillindex)
pass