How is it possible to achieve the following at the same time in python 3:
- Serialize column names and numerical data as a binary file
- Reopen the file and append additional numerical data
For example with the following data:
import numpy as np columns = ['a', 'b', 'c'] data = np.linspace(0, 1, num=10*3).reshape((10, 3)) data_for_appending = np.linspace(2, 3, num=10*3).reshape((10, 3))
My approach with numpy
This approach allows to save data and append additional data. However the column names are missing and loading requires several calls to np.load.
# storing the data with open('out.npy', 'wb') as f: np.save(f, data) np.save(f, data_for_appending) # loading the data with open('out.npy', 'rb') as f: data1 = np.load(f) data2 = np.load(f)
My approach with pandas
This approach saves the data and header. However it seems not possible to append data to the file in a separate call.
import pandas as pd df = pd.DataFrame(data, columns=columns) # storing the data df.to_pickle('out.pickle') # loading the data df2 = pd.read_pickle('out.pickle')
Advertisement
Answer
import pickle # Write first df to pickle data = { "name": ["Joe", "Mike", "Tony", "Susan"], "course": ["Masters", "Doctorate", "Graduate", "Bachelors"], "age": [27, 23, 21, 19], } df = pd.DataFrame(data) df.to_pickle(path) # Create new row df new_row = {"name": "Phil", "course": "Associates", "age": 30} new_row_df = pd.DataFrame(new_row, index=[0]) print(f"{new_row_df}n") # read original df from pickle pickled_df = pd.read_pickle(path) # concat dfs df_appended = pd.concat([new_row_df, pickled_df]).reset_index(drop=True) # Dump concat df to pickle with open(path, "wb") as f: pickle.dump(df_appended, f) # read concat df from pickle df = pd.read_pickle(path) print(df)
You can append to the file without reading but the dfs wont be concatenated they are seperate entries. You can ofcourse read all the entries in a loop and concat later when it’s time to read the file.
# Add new entries with open(path, "ab") as f: pickle.dump(new_df, f) # When ready to read and concat. with open(path, "rb") as f: entries = [] while True: try: entry = pickle.load(f) except EOFError: break entries.append(entry) df = pd.concat(entries).reset_index(drop=True) print(df)