I have a csv file which has data like the following:
I'm trying to get that into a JSON format as shown below:
[
{
"customerInformation": {
"customerNumber": "000202"
},
"itemInformation": [
{
"itemNumber": "1660100110857"
},
{
"itemNumber": "510520144"
}
]
},
{
"customerInformation": {
"customerNumber": "001040"
},
"itemInformation": [
{
"itemNumber": "0171100243"
},
{
"itemNumber": "0171100288"
},
{
"itemNumber": "017110021212"
},
{
"itemNumber": "017110561010"
},
{
"itemNumber": "2028G1006"
},
{
"itemNumber": "2028G206"
},
{
"itemNumber": "0669406015"
},
{
"itemNumber": "181902000"
},
{
"itemNumber": "0669401020"
}
]
}
]
I almost have it perfect, but I can't figure out how to insert the nested customerNumber under customerInformation. Here is what I have so far, but it's not the exact output I need.
import pandas as pd
import os
import sys
import json
directory = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
dtype_dic = {'customerNumber' : str, 'itemNumber':str}
if os.path.isfile(directory + '/' + 'Price_Checker.csv'):
print("Program Running.... please standby")
else:
print("Required file Price_Checker.csv not found on user's Desktop")
sys.exit()
df = pd.read_csv(directory + r'\\Price_Checker.csv', dtype=dtype_dic)
print(df)
result = []
for cust_id, cust_df in df.groupby('customerNumber'):
cust_dict = {'customerInformation': cust_id}
cust_dict['itemInformation'] = []
for item_id, item_df in cust_df.groupby('itemNumber'):
item_dict = {'itemNumber': item_id}
cust_dict['itemInformation'].append(item_dict)
result.append(cust_dict)
print(json.dumps(result, indent=4))
Thank you in advance for any assistance you can lend.
You can use nested list comprehension with groupby
:
directory = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
dtype_dic = {'customerNumber' : str, 'itemNumber':str}
if os.path.isfile(directory + '/' + 'Price_Checker.csv'):
print("Program Running.... please standby")
else:
print("Required file Price_Checker.csv not found on user's Desktop")
sys.exit()
df = pd.read_csv(directory + r'\\Price_Checker.csv', dtype=str)
json_data = ([{'customerInformation':{'customerNumber':name},
'itemInformation':[{'itemNumber':item} for item in g]}
for name, g in df.groupby('customerNumber')['itemNumber']])
# Save or print the JSON
with open("output.json", "w") as f:
json.dump(json_data, f, indent=2)
# Optional: print to console
print(json.dumps(json_data, indent=2))