Converting a nested json three levels deep to dataframe

I have a json that is three levels deep. I want to flatten it into a dataframe that has five columns.

id	name	code	level	parent_id

So: The part I struggle with is that I can extract each nested item, but I can't find an elegant way to keep the "parent_id". There's got to be more elegant ways of doing this. Any pointers appreciated.

source = response.json()
print ('there are ' + str(len(source)) + ' records')    

df_L1 = df
df_L2 = json_normalize(df_L1['subLevelCategories'][0])   
df_L3 = json_normalize(df_L2['subLevelCategories'][0])   #store for later   !!!!!

df_L2_wrapper = df_L2['id']
df_L3_wrapper = df_L3['id']
df_L2_wrapper.name = 'parent_id'
df_L3_wrapper.name = 'parent_id'
    
df_L1 = df_L1.head(5)
df_L2 = df_L2.head(5)
df_L3 = df_L3.head(5)
df_L3_wrapper = df_L3_wrapper.head(5)
df_L2_wrapper = df_L2_wrapper.head(5)

# Build of df_L1
df_L1 = df_L1.drop(['subLevelCategories'], axis=1) 
df_L1['parentid']=0

# Build of df_L2
df_L2 = df_L2.drop(['name','code','level'], axis=1) 
 


# Rename the Series
df_L2 = json_normalize(df_L2['subLevelCategories'][0])
# Concatenate the DataFrame and the renamed Series
df_L2 = pd.concat([df_L2, df_L2_wrapper], axis=1)
df_L2 = df_L2.drop(['subLevelCategories'], axis=1) 
# //////    L2 is built. 

# Build of df_L3
df_L3 = df_L3.drop(['subLevelCategories'], axis=1) 
df_L3 = pd.concat([df_L3, df_L3_wrapper], axis=1)

df_combined = pd.concat([df_L1, df_L2, df_L3], ignore_index=True)

EDIT: The sample has been corrected by enclosing it with the '[' and ']'

source originates from

response = requests.get(url, headers=headers)
source = response.json()

The sample JSON is as follows:

[
{
  "id": 3372,
  "name": "Archive",
  "code": null,
  "level": 1,
  "subLevelCategories": [
    {
      "id": 16708,
      "name": ".....",
      "code": null,
      "level": 2,
      "subLevelCategories": [
        {
          "id": 16727,
          "name": ".........",
          "code": null,
          "level": 3,
          "subLevelCategories": null
        },
        {
          "id": 16726,
          "name": "........",
          "code": null,
          "level": 3,
          "subLevelCategories": null
        }
      ]
    },
    {
      "id": 16701,
      "name": ".......",
      "code": null,
      "level": 2,
      "subLevelCategories": [
        {
          "id": 16782,
          "name": "......",
          "code": null,
          "level": 3,
          "subLevelCategories": null
        },
        {
          "id": 16785,
          "name": "......",
          "code": null,
          "level": 3,
          "subLevelCategories": null
        }
      ]
    }
  ]
}
]

Solution

Used recursive function to handle the nested Json data,it stores the current id, name, code, level, and parent_id. The parent_id is passed down as a parameter so that we can track the parent-child relationship as we go deeper into the hierarchy.

EDIT : After Sample Json is added with '[]'

import pandas as pd
data = [
    {
        "id": 3372,
        "name": "Archive",
        "code": None,
        "level": 1,
        "subLevelCategories": [
            {
                "id": 16708,
                "name": ".....",
                "code": None,
                "level": 2,
                "subLevelCategories": [
                    {
                        "id": 16727,
                        "name": ".........",
                        "code": None,
                        "level": 3,
                        "subLevelCategories": None
                    },
                    {
                        "id": 16726,
                        "name": "........",
                        "code": None,
                        "level": 3,
                        "subLevelCategories": None
                    }
                ]
            },
            {
                "id": 16701,
                "name": ".......",
                "code": None,
                "level": 2,
                "subLevelCategories": [
                    {
                        "id": 16782,
                        "name": "......",
                        "code": None,
                        "level": 3,
                        "subLevelCategories": None
                    },
                    {
                        "id": 16785,
                        "name": "......",
                        "code": None,
                        "level": 3,
                        "subLevelCategories": None
                    }
                ]
            }
        ]
    }
]

def flatten_categories(data, parent_id=0):  # default parent_id for top-level categories
    flattened_data = []
    for item in data:
        flat_item = {
            'id': item['id'],
            'name': item['name'],
            'code': item['code'],
            'level': item['level'],
            'parent_id': parent_id
        }
        flattened_data.append(flat_item)
        if 'subLevelCategories' in item and item['subLevelCategories']:
            flattened_data.extend(flatten_categories(item['subLevelCategories'], parent_id=item['id']))
    
    return flattened_data

flattened_data = flatten_categories(data)
df = pd.DataFrame(flattened_data)
print(df)

Output

      id       name  code  level  parent_id
0   3372    Archive  None      1          0
1  16708      .....  None      2       3372
2  16727  .........  None      3      16708
3  16726   ........  None      3      16708
4  16701    .......  None      2       3372
5  16782     ......  None      3      16701
6  16785     ......  None      3      16701