My data is as following (this just extract but there are much more objects, some don’t have the additionalData)
{
"referenceSetCount":1,
"totalRowCount":4,
"referenceSets":[
{
"name":"table",
"rowCount":4,
"_links":{
"self":{
"href":"link"
}
},
"referenceDataItems":[
{
"col1":"5524",
"col2":"yyy",
"col3":1,
"additionalData":[
{
"col1":111,
"col2":"xxxx",
"col3":1,
"col4":"18"
},
{
"col1":222,
"col2":"2222",
"col3":1,
"col4":"1"
}
]
},
{
"col1":"26434",
"col2":"dfdshere",
"col3":2,
"additionalData":[
{
"col1":34522,
"col2":"fsfs",
"col3":2,
"col4":"18"
},
{
"col1":5444,
"col2":"gregrege",
"col3":2,
"col4":"2"
}
]
}
]
}
]
}
I’m trying to iterate with list comprehension to get dataframe of referenceDataItems and everything within that key, also additionalData if appears.
import os
import sys
import pandas as pd
import urllib.request, json
api_url = urllib.request.urlopen("link_to_my_data")
api_data = json.loads(api_url.read())
#nest loop to get referenceSets + nested additionalData
data_alt = [v for k, v in api_data.items() if k == 'referenceSets']
Expected result:
col1 col2 col3 col1 col2 col3 col4 col1 col2 col3 col4 5524 yyy 1 111 xxxx 1 18 222 2222 1 1 26434 dfdshere 2 34522 fsfs 2 18 5444 gregrege 2 2
Advertisement
Answer
I did some research and this almost got my desired data, needs little modification in COLUMNS_TO_DROP
COLUMNS_TO_DROP = ["additionalData"]
def expand_additional_data(items):
for item in items:
for av in item.get("additionalData", []):
item[av["col2a"]] = av["col4a"]
yield item
for ref_set in data["referenceSets"]:
table_name = ref_set["name"]
expanded = expand_additional_data(ref_set["referenceDataItems"])
df = pd.DataFrame(expanded)
df = df.drop(COLUMNS_TO_DROP, axis=1, errors="ignore")
print(df)