Background
I have a complex nested JSON object, which I am trying to unpack into a pandas df
in a very specific way.
JSON Object
this is an extract, containing randomized data of the JSON object, which shows examples of the hierarchy (inc. children) for 1x family (i.e. ‘Falconer Family’), however there is 100s of them in total and this extract just has 1x family, however the full JSON object has multiple –
{ "meta": { "columns": [{ "key": "value", "display_name": "Adjusted Value (No Div, USD)", "output_type": "Number", "currency": "USD" }, { "key": "time_weighted_return", "display_name": "Current Quarter TWR (USD)", "output_type": "Percent", "currency": "USD" }, { "key": "time_weighted_return_2", "display_name": "YTD TWR (USD)", "output_type": "Percent", "currency": "USD" }, { "key": "_custom_twr_audit_note_911328", "display_name": "TWR Audit Note", "output_type": "Word" } ], "groupings": [{ "key": "_custom_name_747205", "display_name": "* Reporting Client Name" }, { "key": "_custom_new_entity_group_453577", "display_name": "NEW Entity Group" }, { "key": "_custom_level_2_624287", "display_name": "* Level 2" }, { "key": "legal_entity", "display_name": "Legal Entity" } ] }, "data": { "type": "portfolio_views", "attributes": { "total": { "name": "Total", "columns": { "time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163 }, "children": [{ "name": "Falconer Family", "grouping": "_custom_name_747205", "columns": { "time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163 }, "children": [{ "name": "Wealth Bucket A", "grouping": "_custom_new_entity_group_453577", "columns": { "time_weighted_return": -0.045960317420568164, "time_weighted_return_2": -0.045960317420568164, "_custom_twr_audit_note_911328": null, "value": 13264448.506587159 }, "children": [{ "name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": 0.000003434094574039648, "time_weighted_return_2": 0.000003434094574039648, "_custom_twr_audit_note_911328": null, "value": 3337.99 }, "children": [{ "entity_id": 10604454, "name": "HUDJ Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.000003434094574039648, "time_weighted_return_2": 0.000003434094574039648, "_custom_twr_audit_note_911328": null, "value": 3337.99 }, "children": [] }] }, { "name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987 }, "children": [{ "entity_id": 10604454, "name": "HUDG Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987 }, "children": [] }] }, { "name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004 }, "children": [{ "entity_id": 10604454, "name": "HKDJ Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004 }, "children": [] }] }, { "name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006 }, "children": [{ "entity_id": 10604454, "name": "HUDW Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006 }, "children": [] }] }, { "name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593 }, "children": [{ "entity_id": 10604454, "name": "HJDJ Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593 }, "children": [] }] }, { "name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999 }, "children": [{ "entity_id": 10604454, "name": "HADJ Trust", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999 }, "children": [] }] } ] }, { "name": "Wealth Bucket B", "grouping": "_custom_new_entity_group_453577", "columns": { "time_weighted_return": -0.04769870075659244, "time_weighted_return_2": -0.04769870075659244, "_custom_twr_audit_note_911328": null, "value": 9868044.398519998 }, "children": [{ "name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": 0.000028632718065191298, "time_weighted_return_2": 0.000028632718065191298, "_custom_twr_audit_note_911328": null, "value": 10234.94 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.0000282679297198829, "time_weighted_return_2": 0.0000282679297198829, "_custom_twr_audit_note_911328": null, "value": 244.28 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.000049373572795108345, "time_weighted_return_2": 0.000049373572795108345, "_custom_twr_audit_note_911328": null, "value": 5081.08 }, "children": [] }, { "entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.000006609603754315074, "time_weighted_return_2": 0.000006609603754315074, "_custom_twr_audit_note_911328": null, "value": 1523.62 }, "children": [] }, { "entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.000010999769004760296, "time_weighted_return_2": 0.000010999769004760296, "_custom_twr_audit_note_911328": null, "value": 1828.9 }, "children": [] }, { "entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": { "time_weighted_return": 0.000006466673995619843, "time_weighted_return_2": 0.000006466673995619843, "_custom_twr_audit_note_911328": null, "value": 1557.06 }, "children": [] } ] }, { "name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.024645947842438676, "time_weighted_return_2": -0.024645947842438676, "_custom_twr_audit_note_911328": null, "value": 674052.31962 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.043304004172576405, "time_weighted_return_2": -0.043304004172576405, "_custom_twr_audit_note_911328": null, "value": 52800.96 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.022408434778798836, "time_weighted_return_2": -0.022408434778798836, "_custom_twr_audit_note_911328": null, "value": 599594.11962 }, "children": [] }, { "entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08 }, "children": [] }, { "entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08 }, "children": [] }, { "entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08 }, "children": [] } ] }, { "name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.03037038746301135, "time_weighted_return_2": -0.03037038746301135, "_custom_twr_audit_note_911328": null, "value": 114472.69744 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.030370390035505124, "time_weighted_return_2": -0.030370390035505124, "_custom_twr_audit_note_911328": null, "value": 114472.68744000001 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": 0, "time_weighted_return_2": 0, "_custom_twr_audit_note_911328": null, "value": 0.01 }, "children": [] } ] }, { "name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.06604362523792162, "time_weighted_return_2": -0.06604362523792162, "_custom_twr_audit_note_911328": null, "value": 5722529.229999997 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.06154960593668424, "time_weighted_return_2": -0.06154960593668424, "_custom_twr_audit_note_911328": null, "value": 1191838.9399999995 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.06750460387418267, "time_weighted_return_2": -0.06750460387418267, "_custom_twr_audit_note_911328": null, "value": 4416618.520000002 }, "children": [] }, { "entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 38190.33 }, "children": [] }, { "entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72 }, "children": [] }, { "entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72 }, "children": [] } ] }, { "name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.017118805423322003, "time_weighted_return_2": -0.017118805423322003, "_custom_twr_audit_note_911328": null, "value": 3148495.0914600003 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.015251157805867277, "time_weighted_return_2": -0.015251157805867277, "_custom_twr_audit_note_911328": null, "value": 800493.06146 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.01739609576880241, "time_weighted_return_2": -0.01739609576880241, "_custom_twr_audit_note_911328": null, "value": 2215511.2700000005 }, "children": [] }, { "entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.02085132265594647, "time_weighted_return_2": -0.02085132265594647, "_custom_twr_audit_note_911328": null, "value": 44031.21 }, "children": [] }, { "entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.02089393244695803, "time_weighted_return_2": -0.02089393244695803, "_custom_twr_audit_note_911328": null, "value": 44394.159999999996 }, "children": [] }, { "entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.020607507059866248, "time_weighted_return_2": -0.020607507059866248, "_custom_twr_audit_note_911328": null, "value": 44065.39000000001 }, "children": [] } ] }, { "name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": { "time_weighted_return": -0.0014710489231547497, "time_weighted_return_2": -0.0014710489231547497, "_custom_twr_audit_note_911328": null, "value": 198260.12 }, "children": [{ "entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.0014477244560456848, "time_weighted_return_2": -0.0014477244560456848, "_custom_twr_audit_note_911328": null, "value": 44612.33 }, "children": [] }, { "entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": { "time_weighted_return": -0.001477821083437858, "time_weighted_return_2": -0.001477821083437858, "_custom_twr_audit_note_911328": null, "value": 153647.78999999998 }, "children": [] } ] } ] } ] }] } }, "included": [] } }
Notes on JSON Object extract
data
– data in here can be ignored, these are aggregated values for underlying children.meta
–columns
– contains the column header values I want to use for each applicablechildren
‘column` key:pair values.groupings
– can be ignored.children
hierarchy – there are 4x levels ofchildren
which can be identified by theirname
as follows –- Family
name
(i.e., ‘Falconer Family’) - Wealth Bucket
name
(e.g., ‘Wealth Bucket A’) - Asset Class
name
(e.g., ‘Asset Class A’) - Fund
name
(e.g., ‘HUDJ Trust’)
- Family
Target Output
this is an extract of target df
structure I am trying to achieve –
portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
---|---|---|---|---|---|---|
Falconer Family | Falconer Family | 23132492.90510712 | -0.046732301295604683 | -0.046732301295604683 | None | |
Falconer Family | Wealth Bucket A | 13264448.506587146 | -0.045960317420568164 | -0.045960317420568164 | None | |
Falconer Family | Asset Class A | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None | |
Falconer Family | HUDJ Trust | 10604454 | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None |
Falconer Family | Asset Class B | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None | |
Falconer Family | HUDG Trust | 10604454 | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None |
Falconer Family | Asset Class C | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None | |
Falconer Family | HKDJ Trust | 10604454 | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None |
Falconer Family | Asset Class D | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None | |
Falconer Family | HUDW Trust | 10604454 | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None |
Notes on Target Output
- Portfolio header – for every row, I would like to map the top-level
children
name
value [family name]. E.g., ‘Falconer Family. - Name header – this should simply be the
name
value from each respectivechildren
. - Entity ID – all 4th level
children
entity_id
value should be mapped to this column. - Data columns – regardless of level, all
children
have identicaltime_weighted_return
,time-weighted_return2
andvalue
columns which should be mapped respectively. - TWR Audit Note – these
children
_custom_twr_audit_note_911318
values are currently blank, but will be utilized in the future.
Current Output
My main issue is that you can see that I have only been able to tap into the 1st [Family] and 2nd [Wealth Bucket] children
level. This leaves me missing the 3rd [Asset Class] and 4th [Fund] –
portfolio | name | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note) | |
---|---|---|---|---|---|---|
0 | Falconer Family | Falconer Family | 2.313249e+07 | -0.046732 | -0.046732 | None |
1 | Falconer Family | Wealth Bucket A | 1.326445e+07 | -0.045960 | -0.045960 | None |
2 | Falconer Family | Wealth Bucket B | 9.868044e+06 | -0.047699 | -0.047699 | None |
Current code
This is a function which gets me the correct df
formatting, however my main issue is that I haven’t been able to find a solution to returning all children, but rather only the top-level –
# Function to read API response / JSON Object def response_writer(): with open('api_response_2022-02-13.json') as f: api_response = json.load(f) return api_response # Function to unpack JSON response into pandas dataframe. def unpack_response(): while True: try: api_response = response_writer() portfolio_views_children = api_response['data']['attributes']['total']['children'] portfolios = [] for portfolio in portfolio_views_children: entity_columns = [] # include portfolio itself within an iterable so the total is the header for entity in itertools.chain([portfolio], portfolio["children"]): entity_data = entity["columns"].copy() # don't mutate original response entity_data["portfolio"] = portfolio["name"] # from outer entity_data["name"] = entity["name"] entity_columns.append(entity_data) df = pd.DataFrame(entity_columns) portfolios.append(df) # combine dataframes df = pd.concat(portfolios) # reorder and rename column_ordering = {"portfolio": "portfolio", "name": "name"} column_ordering.update({c["key"]: c["display_name"] for c in api_response["meta"]["columns"]}) df = df[column_ordering.keys()] # beware: un-named cols will be dropped df = df.rename(columns=column_ordering) break except KeyError: print("-----------------------------------n","API TIMEOUT ERROR: TRY AGAIN...", "n-----------------------------------n") return df unpack_response()
Help
In short, I am looking for some advice on how I can tap into the remaining children
by enhancing the existing code. Whilst I have taken much time to fully explain my problem, please ask if anything isn’t clear. Please note that the JSON may have multiple families, so the solution / advice offered must observe this
Advertisement
Answer
jsonpath-ng
can parse even such a nested json object very easily. You can install this convenient library by the following command:
pip install --upgrade jsonpath-ng
Code:
import json import jsonpath_ng as jp import pandas as pd def unpack_response(r): # Create a dataframe from extracted data expr = jp.parse('$..children.[*]') data = [{'full_path': str(m.full_path), **m.value} for m in expr.find(r)] df = pd.json_normalize(data).sort_values('full_path', ignore_index=True) # Append a portfolio column df['portfolio'] = df.loc[df.full_path.str.contains(r'total.children.[d+]$'), 'name'] df['portfolio'].fillna(method='ffill', inplace=True) # Deal with columns trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']} cols = ['full_path', 'portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note'] df = df.rename(columns=trans)[cols] return df # Load the sample data from file # with open('api_response_2022-02-13.json', 'r') as f: # api_response = json.load(f) # Load the sample data from string api_response = json.loads('{"meta": {"columns": [{"key": "value", "display_name": "Adjusted Value (No Div, USD)", "output_type": "Number", "currency": "USD"}, {"key": "time_weighted_return", "display_name": "Current Quarter TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "time_weighted_return_2", "display_name": "YTD TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "_custom_twr_audit_note_911328", "display_name": "TWR Audit Note", "output_type": "Word"}], "groupings": [{"key": "_custom_name_747205", "display_name": "* Reporting Client Name"}, {"key": "_custom_new_entity_group_453577", "display_name": "NEW Entity Group"}, {"key": "_custom_level_2_624287", "display_name": "* Level 2"}, {"key": "legal_entity", "display_name": "Legal Entity"}]}, "data": {"type": "portfolio_views", "attributes": {"total": {"name": "Total", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Falconer Family", "grouping": "_custom_name_747205", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Wealth Bucket A", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.045960317420568164, "time_weighted_return_2": -0.045960317420568164, "_custom_twr_audit_note_911328": null, "value": 13264448.506587159}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": [{"entity_id": 10604454, "name": "HUDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": [{"entity_id": 10604454, "name": "HUDG Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": [{"entity_id": 10604454, "name": "HKDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": [{"entity_id": 10604454, "name": "HUDW Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": [{"entity_id": 10604454, "name": "HJDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": [{"entity_id": 10604454, "name": "HADJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": []}]}]}, {"name": "Wealth Bucket B", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.04769870075659244, "time_weighted_return_2": -0.04769870075659244, "_custom_twr_audit_note_911328": null, "value": 9868044.398519998}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 2.8632718065191298e-05, "time_weighted_return_2": 2.8632718065191298e-05, "_custom_twr_audit_note_911328": null, "value": 10234.94}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 2.82679297198829e-05, "time_weighted_return_2": 2.82679297198829e-05, "_custom_twr_audit_note_911328": null, "value": 244.28}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 4.9373572795108345e-05, "time_weighted_return_2": 4.9373572795108345e-05, "_custom_twr_audit_note_911328": null, "value": 5081.08}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.609603754315074e-06, "time_weighted_return_2": 6.609603754315074e-06, "_custom_twr_audit_note_911328": null, "value": 1523.62}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": 1.0999769004760296e-05, "time_weighted_return_2": 1.0999769004760296e-05, "_custom_twr_audit_note_911328": null, "value": 1828.9}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.466673995619843e-06, "time_weighted_return_2": 6.466673995619843e-06, "_custom_twr_audit_note_911328": null, "value": 1557.06}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.024645947842438676, "time_weighted_return_2": -0.024645947842438676, "_custom_twr_audit_note_911328": null, "value": 674052.31962}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.043304004172576405, "time_weighted_return_2": -0.043304004172576405, "_custom_twr_audit_note_911328": null, "value": 52800.96}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.022408434778798836, "time_weighted_return_2": -0.022408434778798836, "_custom_twr_audit_note_911328": null, "value": 599594.11962}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.03037038746301135, "time_weighted_return_2": -0.03037038746301135, "_custom_twr_audit_note_911328": null, "value": 114472.69744}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370390035505124, "time_weighted_return_2": -0.030370390035505124, "_custom_twr_audit_note_911328": null, "value": 114472.68744000001}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 0, "time_weighted_return_2": 0, "_custom_twr_audit_note_911328": null, "value": 0.01}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.06604362523792162, "time_weighted_return_2": -0.06604362523792162, "_custom_twr_audit_note_911328": null, "value": 5722529.229999997}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06154960593668424, "time_weighted_return_2": -0.06154960593668424, "_custom_twr_audit_note_911328": null, "value": 1191838.9399999995}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06750460387418267, "time_weighted_return_2": -0.06750460387418267, "_custom_twr_audit_note_911328": null, "value": 4416618.520000002}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 38190.33}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.017118805423322003, "time_weighted_return_2": -0.017118805423322003, "_custom_twr_audit_note_911328": null, "value": 3148495.0914600003}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.015251157805867277, "time_weighted_return_2": -0.015251157805867277, "_custom_twr_audit_note_911328": null, "value": 800493.06146}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01739609576880241, "time_weighted_return_2": -0.01739609576880241, "_custom_twr_audit_note_911328": null, "value": 2215511.2700000005}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02085132265594647, "time_weighted_return_2": -0.02085132265594647, "_custom_twr_audit_note_911328": null, "value": 44031.21}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02089393244695803, "time_weighted_return_2": -0.02089393244695803, "_custom_twr_audit_note_911328": null, "value": 44394.159999999996}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.020607507059866248, "time_weighted_return_2": -0.020607507059866248, "_custom_twr_audit_note_911328": null, "value": 44065.39000000001}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.0014710489231547497, "time_weighted_return_2": -0.0014710489231547497, "_custom_twr_audit_note_911328": null, "value": 198260.12}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.0014477244560456848, "time_weighted_return_2": -0.0014477244560456848, "_custom_twr_audit_note_911328": null, "value": 44612.33}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.001477821083437858, "time_weighted_return_2": -0.001477821083437858, "_custom_twr_audit_note_911328": null, "value": 153647.78999999998}, "children": []}]}]}]}]}}, "included": []}}') df = unpack_response(api_response)
Explanation:
Firstly, you can confirm the expected output by the following command:
print(df.iloc[:5:,1:])
portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
---|---|---|---|---|---|---|
Falconer Family | Falconer Family | nan | 2.31325e+07 | -0.0467323 | -0.0467323 | |
Falconer Family | Wealth Bucket A | nan | 1.32644e+07 | -0.0459603 | -0.0459603 | |
Falconer Family | Asset Class A | nan | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
Falconer Family | HUDJ Trust | 1.06045e+07 | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
Falconer Family | Asset Class B | nan | 1.017e+06 | -0.0258713 | -0.0258713 |
Subsequently, you can see one of the wonderful features in jsonpath-ng
by the following command:
print(df.iloc[:10,:3])
full_path | portfolio | name |
---|---|---|
data.attributes.total.children.[0] | Falconer Family | Falconer Family |
data.attributes.total.children.[0].children.[0] | Falconer Family | Wealth Bucket A |
data.attributes.total.children.[0].children.[0].children.[0] | Falconer Family | Asset Class A |
data.attributes.total.children.[0].children.[0].children.[0].children.[0] | Falconer Family | HUDJ Trust |
data.attributes.total.children.[0].children.[0].children.[1] | Falconer Family | Asset Class B |
data.attributes.total.children.[0].children.[0].children.[1].children.[0] | Falconer Family | HUDG Trust |
data.attributes.total.children.[0].children.[0].children.[2] | Falconer Family | Asset Class C |
data.attributes.total.children.[0].children.[0].children.[2].children.[0] | Falconer Family | HKDJ Trust |
data.attributes.total.children.[0].children.[0].children.[3] | Falconer Family | Asset Class D |
data.attributes.total.children.[0].children.[0].children.[3].children.[0] | Falconer Family | HUDW Trust |
Thanks to the full_path
column, you can grasp the nesting level of the extracted data in each row instantaneously. Actually, I appended the correct portfolio
values by using these paths.
In terms of the code, the key point is the following line:
expr = jp.parse('$..children.[*]')
By the above expression, you can search the children
attributes at any level of the json object. README.rst tells you what each syntax stands for.
Syntax | Meaning |
---|---|
$ |
The root object |
jsonpath1 .. jsonpath2 |
All nodes matched by jsonpath2 that descend from any node matching jsonpath1 |
[*] |
any array index |
Speed:
I compared the speed between the above method with jsonpath-ng
and a nested-for-loop method shown below.
# Comparison:
Method | Duration | Speed ratio |
---|---|---|
jsonpath-ng |
9.72 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | 5.7 (faster) |
Nested-for-loop | 55.4 ms ± 7.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | 1 |
# Code of the nested-for-loop method:
def unpack_response(r): df = pd.DataFrame() for _, r1 in pd.json_normalize(r, ['data', 'attributes', 'total', 'children']).iterrows(): r1['portfolio'] = r1['name'] df = df.append(r1) for _, r2 in pd.json_normalize(r1.children).iterrows(): df = df.append(r2) for _, r3 in pd.json_normalize(r2.children).iterrows(): df = df.append(r3).append(pd.json_normalize(r3.children)) df['portfolio'].fillna(method='ffill', inplace=True) trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']} cols = ['portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note'] df = df.rename(columns=trans)[cols].reset_index(drop=True) return df