jsonpandasdataframecsv

Trying to convert a 2x2 csv file of data into a nested JSON using Pandas


I have a csv file which has data like the following:

enter image description here

I'm trying to get that into a JSON format as shown below:

[
    {
        "customerInformation": {
            "customerNumber": "000202"
        },
        "itemInformation": [
            {
                "itemNumber": "1660100110857"
            },
            {
                "itemNumber": "510520144"
            }
        ]
    },
    {
        "customerInformation": {
            "customerNumber": "001040"
        },
        "itemInformation": [
            {
                "itemNumber": "0171100243"
            },
            {
                "itemNumber": "0171100288"
            },
            {
                "itemNumber": "017110021212"
            },
            {
                "itemNumber": "017110561010"
            },
            {
                "itemNumber": "2028G1006"
            },
            {
                "itemNumber": "2028G206"
            },
            {
                "itemNumber": "0669406015"
            },
            {
                "itemNumber": "181902000"
            },
            {
                "itemNumber": "0669401020"
            }
        ]
    }
]

I almost have it perfect, but I can't figure out how to insert the nested customerNumber under customerInformation. Here is what I have so far, but it's not the exact output I need.

import pandas as pd
import os
import sys
import json

directory = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')

dtype_dic = {'customerNumber' : str, 'itemNumber':str}

if os.path.isfile(directory + '/' + 'Price_Checker.csv'):
    print("Program Running.... please standby")
else:
    print("Required file Price_Checker.csv not found on user's Desktop")
    sys.exit()

df = pd.read_csv(directory + r'\\Price_Checker.csv', dtype=dtype_dic)


print(df)
result = []

for cust_id, cust_df in df.groupby('customerNumber'):
    cust_dict = {'customerInformation': cust_id}
    cust_dict['itemInformation'] = []
    for item_id, item_df in cust_df.groupby('itemNumber'):
        item_dict = {'itemNumber': item_id}
        cust_dict['itemInformation'].append(item_dict)
    result.append(cust_dict)

print(json.dumps(result, indent=4))

Thank you in advance for any assistance you can lend.


Solution

  • You can use nested list comprehension with groupby:

    directory = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    
    dtype_dic = {'customerNumber' : str, 'itemNumber':str}
    
    if os.path.isfile(directory + '/' + 'Price_Checker.csv'):
        print("Program Running.... please standby")
    else:
        print("Required file Price_Checker.csv not found on user's Desktop")
        sys.exit()
    
    df = pd.read_csv(directory + r'\\Price_Checker.csv', dtype=str)
    
    
    json_data = ([{'customerInformation':{'customerNumber':name},
                  'itemInformation':[{'itemNumber':item} for item in g]}
                   for name, g in df.groupby('customerNumber')['itemNumber']])
    
    # Save or print the JSON
    with open("output.json", "w") as f:
        json.dump(json_data, f, indent=2)
    
    # Optional: print to console
    print(json.dumps(json_data, indent=2))