I have the following json data and i've been trying to flatten it out into a single row. I tried a few methods like explode() and json_normalize(data, max_level=3), flatten_json. The code does not give me the desired result. I am having trouble working with nested items. No matter what i used there is no change in the result I see. The columns that are nested are showing up as one column with a list as value. Since I am not an expert in this field, I am having a hard time figuring out what I am missing to flatted the JSON data.
import requests
import pandas as pd
import json
from flatten_json import flatten
response = requests.get(
ELASTICSEARCH_URL,
data = QUERY,
auth = (variables.get('username'), variables.get('password')),
verify = False,
headers = {'Content-Type': 'application/json'}
)
extracted_data = response.json()
required_records = extracted_data["hits"]["hits"][0]["_source"]["response"]["data"][CLAIMTYPE]
df = pd.json_normalize(required_records, max_level=2).fillna('')
#df = flatten(extracted_data)
#print(json.dumps(df, indent=4))
# df1 = df.explode('icdDiagnosisCodes')
# df2 = df1.explode('serviceProcedures')
#print("\nNumner of Records Extracted from MONGODB:\n", df.head(10).to_string())
# df2 = df.explode('icdDiagnosisCodes') # this is not working
df2 = pd.json_normalize(df['icdDiagnosisCodes'])
print("\nNumner of Records Extracted from MONGODB:\n", df2.head(10).to_string())
Data:
{
"providerCity": "SOME CITY",
"providerSpecialtyDescription": "PHYSICAL/OCCUPATIONAL THERAPY",
"updateDate": "YYYY-MM-DD",
"serviceDate": "YYYY-MM-DD",
"providerLastName": "XXXXXXXXXXX",
"gender": "F",
"city": "SOME CITY",
"healthPlanIdentifier": "POS CHOICE PLUS",
"ndcCodeDescription": "NO NDC",
"claimType": "Physician",
"providerName": "XXXX,XXX",
"ndcCode": "NONE",
"zip": "00000",
"providerZip": "00000",
"providerStateCode": "XX",
"providerNpi": "XXXXXXXXXXX",
"icdDiagnosisCodes": [
{
"icdDiagnosisCode": "M25551",
"icdDiagnosisDecimalCode": "M25.551",
"icdDiagnosisCodeDescription": "PAIN IN RIGHT HIP"
},
{
"icdDiagnosisCode": "M545",
"icdDiagnosisDecimalCode": "M54.5",
"icdDiagnosisCodeDescription": "LOW BACK PAIN"
}
],
"dateOfBirth": "YYYY-MM-DD",
"claimId": "ASDFGHJKLTUYBNCNDSDWEWRWDEW",
"memberIdentifier": "999999999",
"providerSpecialtyCode": "99",
"serviceProcedures": [
{
"typeOfServiceCode": "1",
"procedureCode": "97110",
"procedureCodeType": "CPT-4",
"quantityOfServices": "1",
"procedureCodeModifiers": [
{
"procedureCodeModifier": "GP",
"procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
}
],
"toDate": "YYYY-MM-DD",
"placeOfService": "11",
"typeOfServiceDescription": "Medical/Surgical",
"fromDate": "YYYY-MM-DD",
"serviceDiagnoses": [
{
"diagnosisCode": "M25551",
"diagnosisCodeDescription": "PAIN IN RIGHT HIP"
},
{
"diagnosisCode": "M545",
"diagnosisCodeDescription": "LOW BACK PAIN"
}
],
"procedureCodeDescription": "THERAPEUTIC EXERCISES",
"lineNumber": "003",
"placeOfServiceDescription": "OFFICE"
},
{
"typeOfServiceCode": "1",
"procedureCode": "97140",
"procedureCodeType": "CPT-4",
"quantityOfServices": "1",
"procedureCodeModifiers": [
{
"procedureCodeModifier": "GP",
"procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
}
],
"toDate": "YYYY-MM-DD",
"placeOfService": "00",
"typeOfServiceDescription": "Medical/Surgical",
"fromDate": "YYYY-MM-DD",
"serviceDiagnoses": [
{
"diagnosisCode": "M25551",
"diagnosisCodeDescription": "PAIN IN RIGHT HIP"
},
{
"diagnosisCode": "M545",
"diagnosisCodeDescription": "LOW BACK PAIN"
}
],
"procedureCodeDescription": "MANUAL THERAPY 1/> REGIONS",
"lineNumber": "001",
"placeOfServiceDescription": "OFFICE"
},
{
"typeOfServiceCode": "1",
"procedureCode": "97110",
"procedureCodeType": "CPT-4",
"quantityOfServices": "1",
"procedureCodeModifiers": [
{
"procedureCodeModifier": "GP",
"procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
}
],
"toDate": "YYYY-MM-DD",
"placeOfService": "00",
"typeOfServiceDescription": "Medical/Surgical",
"fromDate": "YYYY-MM-DD",
"serviceDiagnoses": [
{
"diagnosisCode": "M25551",
"diagnosisCodeDescription": "PAIN IN RIGHT HIP"
},
{
"diagnosisCode": "M545",
"diagnosisCodeDescription": "LOW BACK PAIN"
}
],
"procedureCodeDescription": "THERAPEUTIC EXERCISES",
"lineNumber": "002",
"placeOfServiceDescription": "OFFICE"
}
],
"providerFirstName": "ANONYMOUS",
"adjudicationFlag": "Y",
"stateCode": "XX",
"icdCodeType": "10",
"claimStatus": "P",
"providerAddress1": "SOME ADDRESS"
}
Since I'm not really sure about what you want your end object to be, and ignoring the Pandas side, I've coded a recursive flattener for the type of dictionary you exhibited. It returns a flat dictionary, whose keys are constructed by concatenating the successive keys for nested dicts, and appending indexes for the successive items of lists:
def dict_flattener(my_dict, rootkey=None, sep="___"):
resp_dict = {}
for key, value in my_dict.items():
resp_key = key if rootkey is None else rootkey + sep + key
if isinstance(value, str):
resp_dict[resp_key] = value
elif isinstance(value, list):
for i, this_dict in enumerate(value):
resp_dict.update(dict_flattener(this_dict, rootkey=f"{resp_key}_{i}", sep=sep))
return resp_dict
Example (from an excerpt of your data) - Note: the alphabetical ordering is an effect of pprint, the order of entries is actually kept.
data = {
"providerCity": "SOME CITY",
"providerSpecialtyDescription": "PHYSICAL/OCCUPATIONAL THERAPY",
"updateDate": "YYYY-MM-DD",
"providerNpi": "XXXXXXXXXXX",
"icdDiagnosisCodes": [
{
"icdDiagnosisCode": "M25551",
"icdDiagnosisDecimalCode": "M25.551",
"icdDiagnosisCodeDescription": "PAIN IN RIGHT HIP"
},
{
"icdDiagnosisCode": "M545",
"icdDiagnosisDecimalCode": "M54.5",
"icdDiagnosisCodeDescription": "LOW BACK PAIN"
}
],
"dateOfBirth": "YYYY-MM-DD"
}
pprint(dict_flattener(data))
{'dateOfBirth': 'YYYY-MM-DD',
'icdDiagnosisCodes_0___icdDiagnosisCode': 'M25551',
'icdDiagnosisCodes_0___icdDiagnosisCodeDescription': 'PAIN IN RIGHT HIP',
'icdDiagnosisCodes_0___icdDiagnosisDecimalCode': 'M25.551',
'icdDiagnosisCodes_1___icdDiagnosisCode': 'M545',
'icdDiagnosisCodes_1___icdDiagnosisCodeDescription': 'LOW BACK PAIN',
'icdDiagnosisCodes_1___icdDiagnosisDecimalCode': 'M54.5',
'providerCity': 'SOME CITY',
'providerNpi': 'XXXXXXXXXXX',
'providerSpecialtyDescription': 'PHYSICAL/OCCUPATIONAL THERAPY',
'updateDate': 'YYYY-MM-DD'}