I am extracting an API where the json response I get is as follows
{
"prod": "Food",
"id": "F1",
"item":
[
{
"prod": "desserts",
"id": "d1",
"item":
[
{
"prod": "chocolates",
"id": "c1",
"item":
[
{
"itemdesc": "Kitkat",
"id": "kit"
},
{
"itemdesc": "Dairymilk",
"id": "Dai"
}
]
},
{
"prod": "icecream",
"id": "id2",
"item":
[
{
"itemdesc": "amul",
"id": "ice"
}
]
}
]
}
]
}
I want to convert the json into a pandas dataframe which will include all the hierarchy levels and the granular level records. I have 7 different type of food products(like dessert, Savoury, etc..) and each type of food products have furthermore children records with atleat 3 to maximum 6 hierarchy levels.
When I try to convert it into a dataframe using pandas.normalize() I am getting following result
prod id item
food f1 [{"prod": "desserts", "id": "d1", "item": ...]
and when I try to give record path it only gives one hierarchy level in accordance with the given record path.
But the reslut I want is to store all hierarchical records from the json response, which is as follows:
prod id item
food f1 [{"prod": "desserts", "id": "d1", "item": ...]
desserts d1 [{"prod": "chocolates", "id": "c1", "item": ...]
chocolates c1 [{"itemdesc": "Kitkat", "id": "kit"...]
NA kit Kitkat
NA Dai DairyMilk
icecream id2 [{"itemdesc": "amul", "id": "ice"...]
NA ice Amul
Apologies for adding the json response and output as image. I was not able to add the json response and output inside the question. Thank you in advance.
You can do it by defining a custom flattening function and a processing function:
import pandas as pd
def flatten_json(y):
out = []
def flatten(x, name=''):
if isinstance(x, dict):
for a in x:
flatten(x[a], name + a + '_')
elif isinstance(x, list):
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out.append((name[:-1], x))
flatten(y)
return dict(out)
import json
data = {
"prod": "Food",
"id": "F1",
"item": [
{
"prod": "desserts",
"id": "d1",
"item": [
{
"prod": "chocolates",
"id": "c1",
"item": [
{
"itemdesc": "Kitkat",
"id": "kit"
},
{
"itemdesc": "Dairymilk",
"id": "Dai"
}
]
},
{
"prod": "icecream",
"id": "id2",
"item": [
{
"itemdesc": "amul",
"id": "ice"
}
]
}
]
}
]
}
def process_json(data, parent_prod=None, parent_id=None):
rows = []
current_prod = data.get('prod', 'NA')
current_id = data.get('id', 'NA')
rows.append({
'prod': parent_prod if parent_prod else current_prod,
'id': current_id,
'item': json.dumps(data.get('item', [])) if 'item' in data else data.get('itemdesc', 'NA')
})
if 'item' in data:
for item in data['item']:
rows.extend(process_json(item, current_prod, current_id))
return rows
flattened_data = process_json(data)
df = pd.DataFrame(flattened_data)
print(df)
which will give you
prod id item
0 Food F1 [{"prod": "desserts", "id": "d1", "item": [{"p...
1 Food d1 [{"prod": "chocolates", "id": "c1", "item": [{...
2 desserts c1 [{"itemdesc": "Kitkat", "id": "kit"}, {"itemde...
3 chocolates kit Kitkat
4 chocolates Dai Dairymilk
5 desserts id2 [{"itemdesc": "amul", "id": "ice"}]
6 icecream ice amul