pythonjsonapidata-partitioning

Python JSON data parsing


Im trying to get the average of "active" for each place under a specific area. So say the output would be ("Andaman and Nicobar Islands": 10, "Andhra Pradesh": 12) however i get type error string indices must be integers from the "r[v["recovered"]].append(v["deceased"])"

import requests, json
from collections import defaultdict
from statistics import mean

# request and parse the data
response_API = requests.get("https://data.covid19india.org/state_district_wise.json").json()

def get_mean_active(response_API):
    r = defaultdict(list)
    for v in response_API.values():
        r[v["area"]].append(v["active"])
    return {k: mean(v) for k, v in r}


print(
    get_mean_active(
        response_API["Andaman and Nicobar Islands"]["districtData"]["top level here"]
    )
)

TypeError: string indices must be integers
PS C:\Users\e\AppData\Local\Programs\Python\Python39\test>  c:; cd 'c:\Users\e\AppData\Local\Programs\Python\Python39\test'; & 'C:\User  File "C:\Users\e\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 268, in run_path    return _run_module_code(code, init_globals, run_name,
  File "C:\Users\e\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 97, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "C:\Users\e\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Users\e\AppData\Local\Programs\Python\Python39\test\ex2.py", line 19, in <module>
    get_mean_active(
  File "c:\Users\e\AppData\Local\Programs\Python\Python39\test\ex2.py", line 14, in get_mean_active
    r[v["Andaman and Nicobar Islands"]].append(v["active"])
TypeError: string indices must be integers
PS C:\Users\e\AppData\Local\Programs\Python\Python39\test>

Solution

  • The argument you're passing is going too deep into the nested dictionaries. You want to pass the entire districtData dictionary to get_mean_active(), not a specific area. Then the function loops over all the areas.

    The loop should use .items(), not .values(), because the area name is the key of each dictionary element, not an element of the nested dictionary.

    And the dictionary comprehension at the end needs to use r.items().

    import requests, json
    from collections import defaultdict
    from statistics import mean
    
    # request and parse the data
    response_API = requests.get("https://data.covid19india.org/state_district_wise.json").json()
    
    def get_mean_active(response_API):
        r = defaultdict(list)
        for area, v in response_API.items():
            r[area].append(v["active"])
        return {k: mean(v) for k, v in r.items()}
    
    
    print(
        get_mean_active(
            response_API["Andaman and Nicobar Islands"]["districtData"]
        )
    )
    

    Result:

    {'Nicobars': 0, 'North and Middle Andaman': 0, 'South Andaman': 19, 'Unknown': -13}