pythonjsonrecursionflattendenormalized

Denormalize/flatten list of nested objects into dot separated key value pairs


It would have simpler if my nested objects were dictionaries, but these are list of dictionaries. Example:

all_objs1 = [{
    'a': 1,
    'b': [{'ba': 2, 'bb': 3}, {'ba': 21, 'bb': 31}],
    'c': 4
}, {
    'a': 11,
    'b': [{'ba': 22, 'bb': 33, 'bc': [{'h': 1, 'e': 2}]}],
    'c': 44
}]

I expect output in following format:

[
  {'a': 1, 'b.ba': 2, 'b.bb': 3, 'c': 4},
  {'a': 1, 'b.ba': 21, 'b.bb': 31, 'c': 4},
  {'a': 11, 'b.ba': 22, 'b.bb': 33, 'bc.h': 1, 'bc.e': 2, 'c': 44},
]

Basically, number of flattened objects generated will be equal to (obj * depth)

With my current code:

def flatten(obj, flattened_obj, last_key=''):
  for k,v in obj.iteritems():
    if not isinstance(v, list):
      flattened_obj.update({last_key+k : v})
    else:
      last_key += k + '.'
      for nest_obj in v:
        flatten(nest_obj, flattened_obj, last_key)
        last_key = remove_last_key(last_key)

def remove_last_key(key_path):
    second_dot = key_path[:-1].rfind('.')
    if second_dot > 0:
      return key_path[:second_dot+1]
    return key_path

Output:


[
  {'a': 1, 'b.bb': 31, 'c': 4, 'b.ba': 21},
  {'a': 11, 'b.bc.e': 2, 'c': 44, 'b.bc.h': 1, 'b.bb': 33, 'b.ba': 22}
]

I am able to flatten the object (not accurate though), but I am not able to create a new object at each nested object. I can not use pandas library as my app is deployed on app engine.


Solution

  • code.py:

    from itertools import product
    from pprint import pprint as pp
    
    
    all_objs = [{
        "a": 1,
        "b": [{"ba": 2, "bb": 3}, {"ba": 21, "bb": 31}],
        "c": 4,
        #"d": [{"da": 2}, {"da": 5}],
    }, {
        "a": 11,
        "b": [{"ba": 22, "bb": 33, "bc": [{"h": 1, "e": 2}]}],
        "c": 44,
    }]
    
    
    def flatten_dict(obj, parent_key=None):
        base_dict = dict()
        complex_items = list()
        very_complex_items = list()
        for key, val in obj.items():
            new_key = ".".join((parent_key, key)) if parent_key is not None else key
            if isinstance(val, list):
                if len(val) > 1:
                    very_complex_items.append((key, val))
                else:
                    complex_items.append((key, val))
            else:
                base_dict[new_key] = val
        if not complex_items and not very_complex_items:
            return [base_dict]
        base_dicts = list()
        partial_dicts = list()
        for key, val in complex_items:
            partial_dicts.append(flatten_dict(val[0], parent_key=new_key))
        for product_tuple in product(*tuple(partial_dicts)):
            new_base_dict = base_dict.copy()
            for new_dict in product_tuple:
                new_base_dict.update(new_dict)
            base_dicts.append(new_base_dict)
        if not very_complex_items:
            return base_dicts
        ret = list()
        very_complex_keys = [item[0] for item in very_complex_items]
        very_complex_vals = tuple([item[1] for item in very_complex_items])
        for product_tuple in product(*very_complex_vals):
            for base_dict in base_dicts:
                new_dict = base_dict.copy()
                new_items = zip(very_complex_keys, product_tuple)
                for key, val in new_items:
                    new_key = ".".join((parent_key, key)) if parent_key is not None else key
                    new_dict.update(flatten_dict(val, parent_key=new_key)[0])
                ret.append(new_dict)
        return ret
    
    
    def main():
        flatten = list()
        for obj in all_objs:
            flatten.extend(flatten_dict(obj))
        pp(flatten)
    
    
    if __name__ == "__main__":
        main()
    

    Notes:

    Output:

    c:\Work\Dev\StackOverflow\q046341856>c:\Work\Dev\VEnvs\py35x64_test\Scripts\python.exe code.py
    [{'a': 1, 'b.ba': 2, 'b.bb': 3, 'c': 4},
     {'a': 1, 'b.ba': 21, 'b.bb': 31, 'c': 4},
     {'a': 11, 'b.ba': 22, 'b.bb': 33, 'b.bc.e': 2, 'b.bc.h': 1, 'c': 44}]
    

    @EDIT0:

    @EDIT1: