I've data stored in pandas dataframe and I want to convert tat into a JSON format. Example data can be replicated using following code
data = {'Product':['A', 'B', 'A'],
'Zone':['E/A', 'A/N', 'E/A'],
'start':['08:00:00', '09:00:00', '12:00:00'],
'end':['12:30:00', '17:00:00', '17:40:00'],
'seq':['0, 1, 2 ,3 ,4','0, 1, 2 ,3 ,4', '0, 1, 2 ,3 ,4'],
'store':['Z',"'AS', 'S'", 'Z']
}
df = pd.DataFrame(data)
I've tried converting it into JSON format using following code
df_parsed = json.loads(df.to_json(orient="records"))
Output generated from above
[{'Product': 'A', 'Zone': 'E/A', 'start': '08:00:00', 'end': '17:40:00', 'seq': '0, 1, 2 ,3 ,4', 'store': 'Z'}, {'Product': 'B', 'Zone': 'A/N', 'start': '09:00:00', 'end': '17:00:00', 'seq': '0, 1, 2 ,3 ,4', 'store': 'AS'}, {'Product': 'A', 'Zone': 'E/A', 'start': '08:00:00', 'end': '17:40:00', 'seq': '0, 1, 2 ,3 ,4', 'store': 'Z'}]
Desired Result:
{
'A': {'Zone': 'E/A',
'tp': [{'start': [8, 0], 'end': [12, 0], 'seq': [0, 1, 2 ,3 ,4]},
{'start': [12, 30], 'end': [17, 40], 'seq': [0, 1, 2 ,3 ,4]}],
'store': ['Z']
},
'B': {'Zone': 'A/N',
'tp': [{'start': [9, 0], 'end': [17, 0], 'seq': [0, 1, 2 ,3 ,4]}],
'store': ['AS', 'S']
}
}
If a product belongs to same store the result for column start
, end
and seq
should be clubbed as shown in desired output. Also start time and end time should be represented like [9,0]
if value for time is "09:00:00"
only hour and minute needs to be represented so we can discard value of seconds from time
columns.
This will be complicated a bit. So you have to do it step by step:
def funct(row):
row['start'] = row['start'].str.split(':').str[0:2]
row['end'] = row['end'].str.split(':').str[0:2]
row['store'] = row['store'].str.replace("'", "").str.split(', ')
d = (row.groupby('Zone')[row.columns[1:]]
.apply(lambda x: x.to_dict(orient='record'))
.reset_index(name='tp').to_dict(orient='row'))
return d
di = df.groupby(['Product'])[df.columns[1:]].apply(funct).to_dict()
di:
{'A': [{'Zone': 'E/A',
'tp': [{'start': ['08', '00'],
'end': ['12', '30'],
'seq': '0, 1, 2 ,3 ,4',
'store': ['Z']},
{'start': ['12', '00'],
'end': ['17', '40'],
'seq': '0, 1, 2 ,3 ,4',
'store': ['Z']}]}],
'B': [{'Zone': 'A/N',
'tp': [{'start': ['09', '00'],
'end': ['17', '00'],
'seq': '0, 1, 2 ,3 ,4',
'store': ['AS', 'S']}]}]}
start
, end
column to a list form.Zone
and apply to_dict to rest of the columns.[{'start': ['08', '00'], 'end': ['12', '30'], 'seq': '0, 1, 2 ,3 ,4',
as tp
.Ultimately you need to convert your dataframe into this below format once you are able to do it the rest of the thing will become easy for you.
Zone tp
E/A [{'start': ['08', '00'], 'end': ['12', '30'], ...
A/N [{'start': ['09', '00'], 'end': ['17', '00'], ...
import pandas as pd
import ast
def funct(row):
y = row['start'].str.split(':').str[0:-1]
row['start'] = row['start'].str.split(':').str[0:2].apply(lambda x: list(map(int, x)))
row['end'] = row['end'].str.split(':').str[0:2].apply(lambda x: list(map(int, x)))
row['seq'] = row['seq'].apply(lambda x: list(map(int, ast.literal_eval(x))))
row['store'] = row['store'].str.replace("'", "")
d = (row.groupby('Zone')[row.columns[1:-1]]
.apply(lambda x: x.to_dict(orient='record'))
.reset_index(name='tp'))
######### For store create a different dataframe and then merge it to the other df ########
d1 = (row.groupby('Zone').agg({'store': pd.Series.unique}))
d1['store'] = d1['store'].str.split(",")
d_merged = (pd.merge(d,d1, on='Zone', how='left')).to_dict(orient='record')[0]
return d_merged
di = df.groupby(['Product'])[df.columns[1:]].apply(funct).to_dict()
di:
{'A': {'Zone': 'E/A',
'tp': [{'start': [8, 0], 'end': [12, 30], 'seq': [0, 1, 2, 3, 4]},
{'start': [12, 0], 'end': [17, 40], 'seq': [0, 1, 2, 3, 4]}],
'store': ['Z']},
'B': {'Zone': 'A/N',
'tp': [{'start': [9, 0], 'end': [17, 0], 'seq': [0, 1, 2, 3, 4]}],
'store': ['AS', ' S']}}