I have a JSON file, and for some keys their value is an array. The JSON can go up to an unspecified depth. I would like to know how to extract all the keys whose value is an array, and also the JSON path to that array.
Example schema:
[
{
"field1": "x",
"field2": ["y", "z"],
"field3": [
{
"field4": "a",
"field5": ["b", "c"]
},
{
"field4": "d",
"field5": ["e", "f"]
}
],
"field6": "g"
}
]
From this, I want they keys field2, field3, field5
and their paths e.g. field2, field3, field3[0][field5], field3[1][field5]
I have got the following code which can identify all keys in a JSON file, but not the type of value it holds nor its path:
def get_keys(d):
if isinstance(d, dict):
for k, v in d.items():
yield k
yield from list(get_keys(v))
elif isinstance(d, list):
for o in d:
yield from list(get_keys(o))
import json
json_data = """
[{
"field1": "x",
"field2": ["y", "z"],
"field3": [
{
"field4": "a",
"field5": ["b", "c"]
},
{
"field4": "d",
"field5": ["e", "f"]
}
],
"field6": "g",
"field7": {
"field8": "h",
"field9": ["i","j"]
}
}]
"""
def find_lists(data, path=""):
if isinstance(data, dict):
for key, value in data.items():
current_path = f"{path}.{key}" if path else key
find_lists(value, current_path)
elif isinstance(data, list):
if path != "":
print(f"Key: {path.split(".")[-1]} Path: {path}")
for index, item in enumerate(data):
current_path = f"{path}[{index}]"
find_lists(item, current_path)
mydata = json.loads(json_data)
find_lists(mydata)
The output is:
Key: field2 Path: [0].field2
Key: field3 Path: [0].field3
Key: field5 Path: [0].field3[0].field5
Key: field5 Path: [0].field3[1].field5
Key: field9 Path: [0].field7.field9