pythondataframepandas-explodejson-flattener

How to normalize a JSON Object that has nested arrays and lists in python pandas


I have the following json data and i've been trying to flatten it out into a single row. I tried a few methods like explode() and json_normalize(data, max_level=3), flatten_json. The code does not give me the desired result. I am having trouble working with nested items. No matter what i used there is no change in the result I see. The columns that are nested are showing up as one column with a list as value. Since I am not an expert in this field, I am having a hard time figuring out what I am missing to flatted the JSON data.

import requests
import pandas as pd
import json
from flatten_json import flatten

response = requests.get(
    ELASTICSEARCH_URL,
    data = QUERY,
    auth = (variables.get('username'), variables.get('password')),
    verify = False,
    headers = {'Content-Type': 'application/json'}
)
extracted_data = response.json()

required_records = extracted_data["hits"]["hits"][0]["_source"]["response"]["data"][CLAIMTYPE]

df = pd.json_normalize(required_records, max_level=2).fillna('')
#df = flatten(extracted_data)
#print(json.dumps(df, indent=4))

# df1 = df.explode('icdDiagnosisCodes')
# df2 = df1.explode('serviceProcedures')
#print("\nNumner of Records Extracted from MONGODB:\n", df.head(10).to_string())

# df2 = df.explode('icdDiagnosisCodes') # this is not working
df2 = pd.json_normalize(df['icdDiagnosisCodes'])
print("\nNumner of Records Extracted from MONGODB:\n", df2.head(10).to_string())

Data:

            {
              "providerCity": "SOME CITY",
              "providerSpecialtyDescription": "PHYSICAL/OCCUPATIONAL THERAPY",
              "updateDate": "YYYY-MM-DD",
              "serviceDate": "YYYY-MM-DD",
              "providerLastName": "XXXXXXXXXXX",
              "gender": "F",
              "city": "SOME CITY",
              "healthPlanIdentifier": "POS CHOICE PLUS",
              "ndcCodeDescription": "NO NDC",
              "claimType": "Physician",
              "providerName": "XXXX,XXX",
              "ndcCode": "NONE",
              "zip": "00000",
              "providerZip": "00000",
              "providerStateCode": "XX",
              "providerNpi": "XXXXXXXXXXX",
              "icdDiagnosisCodes": [
                {
                  "icdDiagnosisCode": "M25551",
                  "icdDiagnosisDecimalCode": "M25.551",
                  "icdDiagnosisCodeDescription": "PAIN IN RIGHT HIP"
                },
                {
                  "icdDiagnosisCode": "M545",
                  "icdDiagnosisDecimalCode": "M54.5",
                  "icdDiagnosisCodeDescription": "LOW BACK PAIN"
                }
              ],
              "dateOfBirth": "YYYY-MM-DD",
              "claimId": "ASDFGHJKLTUYBNCNDSDWEWRWDEW",
              "memberIdentifier": "999999999",
              "providerSpecialtyCode": "99",
              "serviceProcedures": [
                {
                  "typeOfServiceCode": "1",
                  "procedureCode": "97110",
                  "procedureCodeType": "CPT-4",
                  "quantityOfServices": "1",
                  "procedureCodeModifiers": [
                    {
                      "procedureCodeModifier": "GP",
                      "procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
                    }
                  ],
                  "toDate": "YYYY-MM-DD",
                  "placeOfService": "11",
                  "typeOfServiceDescription": "Medical/Surgical",
                  "fromDate": "YYYY-MM-DD",
                  "serviceDiagnoses": [
                    {
                      "diagnosisCode": "M25551",
                      "diagnosisCodeDescription": "PAIN IN RIGHT HIP"
                    },
                    {
                      "diagnosisCode": "M545",
                      "diagnosisCodeDescription": "LOW BACK PAIN"
                    }
                  ],
                  "procedureCodeDescription": "THERAPEUTIC EXERCISES",
                  "lineNumber": "003",
                  "placeOfServiceDescription": "OFFICE"
                },
                {
                  "typeOfServiceCode": "1",
                  "procedureCode": "97140",
                  "procedureCodeType": "CPT-4",
                  "quantityOfServices": "1",
                  "procedureCodeModifiers": [
                    {
                      "procedureCodeModifier": "GP",
                      "procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
                    }
                  ],
                  "toDate": "YYYY-MM-DD",
                  "placeOfService": "00",
                  "typeOfServiceDescription": "Medical/Surgical",
                  "fromDate": "YYYY-MM-DD",
                  "serviceDiagnoses": [
                    {
                      "diagnosisCode": "M25551",
                      "diagnosisCodeDescription": "PAIN IN RIGHT HIP"
                    },
                    {
                      "diagnosisCode": "M545",
                      "diagnosisCodeDescription": "LOW BACK PAIN"
                    }
                  ],
                  "procedureCodeDescription": "MANUAL THERAPY 1/> REGIONS",
                  "lineNumber": "001",
                  "placeOfServiceDescription": "OFFICE"
                },
                {
                  "typeOfServiceCode": "1",
                  "procedureCode": "97110",
                  "procedureCodeType": "CPT-4",
                  "quantityOfServices": "1",
                  "procedureCodeModifiers": [
                    {
                      "procedureCodeModifier": "GP",
                      "procedureCodeModifierDescription": "SERVICES DELIVERED UNDER AN OUTPATIENT PHYSICAL THERAPY PLAN OF CARE"
                    }
                  ],
                  "toDate": "YYYY-MM-DD",
                  "placeOfService": "00",
                  "typeOfServiceDescription": "Medical/Surgical",
                  "fromDate": "YYYY-MM-DD",
                  "serviceDiagnoses": [
                    {
                      "diagnosisCode": "M25551",
                      "diagnosisCodeDescription": "PAIN IN RIGHT HIP"
                    },
                    {
                      "diagnosisCode": "M545",
                      "diagnosisCodeDescription": "LOW BACK PAIN"
                    }
                  ],
                  "procedureCodeDescription": "THERAPEUTIC EXERCISES",
                  "lineNumber": "002",
                  "placeOfServiceDescription": "OFFICE"
                }
              ],
              "providerFirstName": "ANONYMOUS",
              "adjudicationFlag": "Y",
              "stateCode": "XX",
              "icdCodeType": "10",
              "claimStatus": "P",
              "providerAddress1": "SOME ADDRESS"
            }

Solution

  • Since I'm not really sure about what you want your end object to be, and ignoring the Pandas side, I've coded a recursive flattener for the type of dictionary you exhibited. It returns a flat dictionary, whose keys are constructed by concatenating the successive keys for nested dicts, and appending indexes for the successive items of lists:

    def dict_flattener(my_dict, rootkey=None, sep="___"):
        resp_dict = {}
        for key, value in my_dict.items():
            resp_key = key if rootkey is None else rootkey + sep + key
            if isinstance(value, str):
                resp_dict[resp_key] = value
            elif isinstance(value, list):
                for i, this_dict in enumerate(value):
                    resp_dict.update(dict_flattener(this_dict, rootkey=f"{resp_key}_{i}", sep=sep))
        return resp_dict
    

    Example (from an excerpt of your data) - Note: the alphabetical ordering is an effect of pprint, the order of entries is actually kept.

    data = {
                  "providerCity": "SOME CITY",
                  "providerSpecialtyDescription": "PHYSICAL/OCCUPATIONAL THERAPY",
                  "updateDate": "YYYY-MM-DD",
                  "providerNpi": "XXXXXXXXXXX",
                  "icdDiagnosisCodes": [
                    {
                      "icdDiagnosisCode": "M25551",
                      "icdDiagnosisDecimalCode": "M25.551",
                      "icdDiagnosisCodeDescription": "PAIN IN RIGHT HIP"
                    },
                    {
                      "icdDiagnosisCode": "M545",
                      "icdDiagnosisDecimalCode": "M54.5",
                      "icdDiagnosisCodeDescription": "LOW BACK PAIN"
                    }
                  ],
                  "dateOfBirth": "YYYY-MM-DD"
    }
    
    pprint(dict_flattener(data))
    
    {'dateOfBirth': 'YYYY-MM-DD',
     'icdDiagnosisCodes_0___icdDiagnosisCode': 'M25551',
     'icdDiagnosisCodes_0___icdDiagnosisCodeDescription': 'PAIN IN RIGHT HIP',
     'icdDiagnosisCodes_0___icdDiagnosisDecimalCode': 'M25.551',
     'icdDiagnosisCodes_1___icdDiagnosisCode': 'M545',
     'icdDiagnosisCodes_1___icdDiagnosisCodeDescription': 'LOW BACK PAIN',
     'icdDiagnosisCodes_1___icdDiagnosisDecimalCode': 'M54.5',
     'providerCity': 'SOME CITY',
     'providerNpi': 'XXXXXXXXXXX',
     'providerSpecialtyDescription': 'PHYSICAL/OCCUPATIONAL THERAPY',
     'updateDate': 'YYYY-MM-DD'}