pythonjsonpandasdataframeaggregate

how to write the json with subdocument column names to json


I have code as below to write a json from the below dictionary Looks like there is issue in my group by or aggregate. It is not generating the JSON as expected as give below.

import json

data = { "some_id": "123456",
 "some_email": "xyz.abc@acacceptance.com",
 "some_number" : 123456}
df = pd.DataFrame(data, index=[0])
#print(df)


for idx, row in df.iterrows():
  # convert each record to a dictionary
  record_dict = row.to_dict()
  json_data = json.dumps(record_dict, indent=1)
  #print(json_data)
  
phone_number = { "some_number" : 123456, "Contact" :{"phone_number": 45464464646,"contact?": "Y"}}
df_phonnumber = pd.DataFrame(phone_number)
#print(df_phonnumber)

merged_df = pd.merge(df, df_phonnumber, left_on='some_number', right_on='some_number', how='left')
#print(merged_df)

#single columns def_size and dep_name
d = (merged_df.groupby(['some_number','some_email','some_id']).apply(lambda x: x[['Contact']]
      .to_dict('r'))
      .reset_index(name='dont_contact'))

json_str = d.to_json(orient='records')

pretty_json = json.dumps(json.loads(json_str), indent=4)
print(pretty_json)

Expecting the JSON as below

{
        "some_number": 123456,
        "some_email": "xyz.abc@acacceptance.com",
        "some_id": "123456",
        "dont_contact": [
            "Phone_Number": "45464464646",
            "Contact?": "Y"
            ]
}

But I am getting the Json as below. What is missing?

{
        "some_number": 123456,
        "some_email": "xyz.abc@acacceptance.com",
        "some_id": "123456",
        "dont_contact": [
            {
                "Contact": "Y"
            },
            {
                "Contact": 45464464646
            }
        ]
    }

can some please help on this to get the json in the above format?


Solution

  • Load your nested second dataset via json_normalize() to get easy access to the individual attributes then use apply to construct a new column.

    import json
    import pandas as pd
    
    data = {
        "some_id": "123456",
        "some_email": "xyz.abc@acacceptance.com",
        "some_number" : 123456
    }
    
    phone_number = {
        "some_number" : 123456,
        "Contact" : {"phone_number": 45464464646, "contact?": "Y"}
    }
    
    df_data = pd.DataFrame(data, index=[0])
    df_phone_number = pd.json_normalize(phone_number)
    
    df_merged = pd.merge(df_data, df_phone_number, left_on='some_number', right_on='some_number', how='left')
    df_merged["Contact"] = df_merged.apply(lambda x: {"phone_number": x["Contact.phone_number"], "contact?": x["Contact.contact?"]}, axis=1)
    df_merged = df_merged.drop(columns=['Contact.phone_number', 'Contact.contact?'])
    
    json_str = df_merged.to_json(orient='records')
    pretty_json = json.dumps(json.loads(json_str), indent=4)
    print(pretty_json)
    

    Giving you:

    [
        {
            "some_id": "123456",
            "some_email": "xyz.abc@acacceptance.com",
            "some_number": 123456,
            "Contact": {
                "phone_number": 45464464646,
                "contact?": "Y"
            }
        }
    ]