pythonlistdictionarynestedordereddict

How to sort all the nested dictionaries and lists inside a dictionary or list at once?


I am trying to develop the most efficient/comprehensive function with this aim:

Sorting every nested dictionary or list inside a dictionary or list.

Note: I used the collections.OrderedDict because I wanted to make it useful also for python versions before the 3.7, the ones that does not preserve order in dictionaries.

Based on the recursive function from this thread, which sorts only nested dictionaries, I'm trying to build a correspondant recursive function that sorts only nested lists, and then to combine them by using if cycles that identify if the object to be sorted is a dictionary or a list.

This is what I have developed:

from collections import OrderedDict

def recursively_order_dict(d):
    ordered_dict = OrderedDict()
    for key in sorted(d.keys()):
        val = d[key]
        if isinstance(val, dict):
            val = recursively_order_dict(val)
        if isinstance(val, list):
            val = recursively_order_list(val)    
        ordered_dict[key] = val
    return ordered_dict

def recursively_order_list(l):
    ordered_list = []
    for element in sorted(l):
        if isinstance(element, list):
            element = recursively_order_list(element)
        if isinstance(element, dict):
            element = recursively_order_dict(element)
        ordered_list.append(element)
    return ordered_list 

def order_all_dicts_and_lists_in_iterable(iterable1):        
    if isinstance(iterable1, dict):
        ordered_iterable = recursively_order_dict(iterable1)            
    if isinstance(iterable1, list):
        ordered_iterable = recursively_order_list(iterable1)                   
    else:        
        print("%s\n is nor a list nor a dictionary.\nIts type is %s." % (iterable1, type(iterable1)) ) 
        return   
    return ordered_iterable    

It works fine on many examples, but it does not by processing the dictionary dict_2

dict_2 = { 
        "key9":"value9",
        "key5":"value5",
        "key3":{
                "key3_1":"value3_1",
                "key3_5":"value3_5",
                "key3_2":[[],"value3_2_1",[] ],
                },
        "key2":"value2",
        "key8":{
                "key8_1":"value8_1",
                "key8_5":{
                            "key8_5_4":["value8_5_b", "value8_5_a", "value8_5_c"],
                            "key8_5_2":[{},{"key8_5_2_4_2":"value8_5_2_4_2", "key8_5_2_4_1":"value8_5_2_4_1", "key8_5_2_4_5":"value8_5_2_4_5"}, "value8_5_2_1",{}],
                            },
                "key8_2":"value8_2",
                },
        "key1":"value1",
     }

sorted_dict_2 = order_all_dicts_and_lists_in_iterable(dict_2)

and throws this error:

--------------------------------------------------------------------------- TypeError                                 Traceback (most recent call last) <ipython-input-12-9cbf4414127d> in <module>
----> 1 order_all_dicts_and_lists_in_iterable(dict_2)

<ipython-input-9-352b10801248> in order_all_dicts_and_lists_in_iterable(iterable1)
     26 
     27     if isinstance(iterable1, dict):
---> 28         ordered_iterable = recursively_order_dict(iterable1)
     29         if isinstance(iterable1, list):
     30             ordered_iterable = order_all_dicts_and_lists_in_iterable(ordered_iterable)

<ipython-input-9-352b10801248> in recursively_order_dict(d)
      6         val = d[key]
      7         if isinstance(val, dict):
----> 8             val = recursively_order_dict(val)
      9         if isinstance(val, list):
     10             val = recursively_order_list(val)

<ipython-input-9-352b10801248> in recursively_order_dict(d)
      8             val = recursively_order_dict(val)
      9         if isinstance(val, list):
---> 10             val = recursively_order_list(val)
     11         ordered_dict[key] = val
     12     return ordered_dict

<ipython-input-9-352b10801248> in recursively_order_list(l)
     14 def recursively_order_list(l):
     15     ordered_list = []
---> 16     for element in sorted(l):
     17         if isinstance(element, list):
     18             element = recursively_order_list(element)

TypeError: '<' not supported between instances of 'str' and 'list'

So it looks like Python cannot sort iterable made of strings/numbers and lists/dictionaries, because it does not know what to take from lists/dictionaries as a term of comparison.

How could I change my function in order to get lists/dictionaries just being put at the end/start of the sorted iterable, when compared to strings/numbers ?

In few words, how should I change my function to have it turn the above dict_2 into this (hand-edited) sorted_dict_2?

sorted_dict_2 = { 
            "key1":"value1",
            "key2":"value2",
            "key3":{
                    "key3_1":"value3_1",
                    "key3_2":[ [],[],"value3_2_1" ],
                    "key3_5":"value3_5",                    
                    },            
            "key5":"value5",           
            "key8":{
                    "key8_1":"value8_1",
                    "key8_2":"value8_2",
                    "key8_5":{                                
                                "key8_5_2":[
                                            {},
                                            {},
                                            "value8_5_2_1",
                                            {
                                            "key8_5_2_4_1":"value8_5_2_4_1",
                                            "key8_5_2_4_2":"value8_5_2_4_2",                                               
                                            "key8_5_2_4_5":"value8_5_2_4_5"
                                            },                                                                                        
                                            ],
                                "key8_5_4":["value8_5_a", "value8_5_b", "value8_5_c"],
                            },
                    
                    },
            "key9":"value9",
         }

Solution

  • So, basically, you need to make a key function that will make all containers compare less than anything else. A handy value is float('inf') for this. However, since we don't know if the thing we are sorting contains numbers or strings, we have to just transform everything into a tuple, and manually map the ordinal values for each string: map(ord, x)

    The following is an example if you want containers to move to the front (so negative inf...:

    from collections import OrderedDict
    
    def recursively_order_dict(d):
        ordered_dict = OrderedDict()
        for key in sorted(d.keys()):
            val = d[key]
            if isinstance(val, dict):
                val = recursively_order_dict(val)
            if isinstance(val, list):
                val = recursively_order_list(val)
            ordered_dict[key] = val
        return ordered_dict
    
    def _move_containers_to_end(x):
        if isinstance(x, (list, dict)):
            # to put at the end, use inf, at the start, -inf
            return (float('-inf'),)
        elif isinstance(x, str):
            return tuple(map(ord,  x))
        else: # assuming we only can get numbers at this point
            return (x,)
    
    def recursively_order_list(l):
        ordered_list = []
        for element in sorted(l, key=_move_containers_to_end):
            if isinstance(element, list):
                element = recursively_order_list(element)
            if isinstance(element, dict):
                element = recursively_order_dict(element)
            ordered_list.append(element)
        return ordered_list
    
    def order_all_dicts_and_lists_in_iterable(iterable1):
        if isinstance(iterable1, dict):
            ordered_iterable = recursively_order_dict(iterable1)
        elif isinstance(iterable1, list):
            ordered_iterable = recursively_orded_list(iterable1)
        else:
            print("%s\n is nor a list nor a dictionary.\nIts type is %s." % (iterable1, type(iterable1)) )
        return ordered_iterable
    

    The result of the above is:

    OrderedDict([('key1', 'value1'),
                 ('key2', 'value2'),
                 ('key3',
                  OrderedDict([('key3_1', 'value3_1'),
                               ('key3_2', [[], [], 'value3_2_1']),
                               ('key3_5', 'value3_5')])),
                 ('key5', 'value5'),
                 ('key8',
                  OrderedDict([('key8_1', 'value8_1'),
                               ('key8_2', 'value8_2'),
                               ('key8_5',
                                OrderedDict([('key8_5_2',
                                              [OrderedDict(),
                                               OrderedDict([('key8_5_2_4_1',
                                                             'value8_5_2_4_1'),
                                                            ('key8_5_2_4_2',
                                                             'value8_5_2_4_2'),
                                                            ('key8_5_2_4_5',
                                                             'value8_5_2_4_5')]),
                                               OrderedDict(),
                                               'value8_5_2_1']),
                                             ('key8_5_4',
                                              ['value8_5_a',
                                               'value8_5_b',
                                               'value8_5_c'])]))])),
                 ('key9', 'value9')])
    

    Note, you may want to do something like:

    import sys:
    if sys.version_info.minor < 7:
        OrderedMapping = dict
    else:
        from collections import OrderedDict as OrderedMapping
    

    Then use:

    ordered_dict = OrderedMapping()
    

    in recursively_order_dict