pythoncsvreorganize

Reorganize Dictionary


I have a .csv file that I've read into python using csv.dictwriter that looks something like this:

[
    {'id':'1', 'date':'12312004','var1':'234', 'var2':'456'},
    {'id':'1', 'date':'12312005','var1':'200', 'var2':'466'},
    {'id':'1', 'date':'12312006','var1':'760', 'var2':'656'},
    {'id':'1', 'date':'12312007','var1':'215', 'var2':'256'},
    {'id':'2', 'date':'5312006','var1':'234', 'var2':'456'},
    {'id':'2', 'date':'5312007','var1':'200', 'var2':'466'},
    {'id':'2', 'date':'5312008','var1':'760', 'var2':'656'},
    {'id':'2', 'date':'5312009','var1':'215', 'var2':'256'},
]

I'd like to reorganize it to:

{'1': {
    '12312004': {'var1': '234', 'var2': '456'},
    '12312005': {'var1': '200', 'var2': '466'},
    '12312006': {'var1': '760', 'var2': '656'},
    '12312007': {'var1': '215', 'var2': '256'}},
 '2': {
     '5312006': {'var1': '234', 'var2': '456'},
     '5312007': {'var1': '200', 'var2': '466'},
     '5312008': {'var1': '760', 'var2': '656'},
     '5312009': {'var1': '215', 'var2': '256'}}
}

Solution

  • Provided your input is a list, you can do this:

    output = {}
    for entry in input_list:
        entry = entry.copy()  # so we can mangle it
        id = entry.pop('id')
        date = entry.pop('date')
        output.setdefault(id, {})[date] = entry
    

    The dict.setdefault() call returns the value for the key id, but if there is no such key, the second argument will be added to the output dictionary for that key and returned instead.

    Demo:

    >>> input_list = [
    ...     {'id':'1', 'date':'12312004','var1':'234', 'var2':'456'},
    ...     {'id':'1', 'date':'12312005','var1':'200', 'var2':'466'},
    ...     {'id':'1', 'date':'12312006','var1':'760', 'var2':'656'},
    ...     {'id':'1', 'date':'12312007','var1':'215', 'var2':'256'},
    ...     {'id':'2', 'date':'5312006','var1':'234', 'var2':'456'},
    ...     {'id':'2', 'date':'5312007','var1':'200', 'var2':'466'},
    ...     {'id':'2', 'date':'5312008','var1':'760', 'var2':'656'},
    ...     {'id':'2', 'date':'5312009','var1':'215', 'var2':'256'},
    ... ]
    >>> output = {}
    >>> for entry in input_list:
    ...     entry = entry.copy()  # so we can alter it
    ...     id = entry.pop('id')
    ...     date = entry.pop('date')
    ...     output.setdefault(id, {})[date] = entry
    ... 
    >>> from pprint import pprint
    >>> pprint(output)
    {'1': {'12312004': {'var1': '234', 'var2': '456'},
           '12312005': {'var1': '200', 'var2': '466'},
           '12312006': {'var1': '760', 'var2': '656'},
           '12312007': {'var1': '215', 'var2': '256'}},
     '2': {'5312006': {'var1': '234', 'var2': '456'},
           '5312007': {'var1': '200', 'var2': '466'},
           '5312008': {'var1': '760', 'var2': '656'},
           '5312009': {'var1': '215', 'var2': '256'}}}