pythonpython-3.xstringregexp-replacejsonencoder

List elements should be separated by comma and space in json.dumps


I have a json which contains dictionaries, lists, integers etc.

json_str = '''
{
    "name": "John",
    "age": 30,
    "pets": [
        {
            "name": "Fluffy",
            "type": "cat",
            "toys": [
                "ball",
                "string",
                "box"
            ]
        },
        {
            "name": "Fido",
            "type": "dog",
            "toys": [
                "bone",
                "frisbee"
            ]
        }
    ]
}
'''

Currently each element inside list starts with a new line. I want list elements to start from the same line as the last element ended. The end result should look something like this -

{
    "name": "John",
    "age": 30,
    "pets": [ {
          "name": "Fluffy",
          "type": "cat",
          "toys": [ "ball", "string", "box" ]
        }, {
           "name": "Fido",
           "type": "dog",
           "toys": [ "bone", "frisbee" ]
     } ]
}
'''

Basically I want list elements to be separated by a comma and a whitespace. I'm using python json.dumps to write json to a file and want a solution in python

I've searched through internet and chatgpt but couldn't find anything that works. I found some solution like using custom encoder in json.dumps but it doesn't work -

class ObjectEncoder(JSONEncoder):
    def default(self, o):
        if isinstance(o, Enum):
            return o.name

        if isinstance(o, list):
            return ", ".join(json.dumps(i, indent=self.indent) for i in o)
        ## Remove all attributes which are None;

        for key, value in list(o.__dict__.items()):
            if value is None:
                del o.__dict__[key]
        return o.__dict__

Solution

  • The suggestion made in this answer works. You'll have to patch the whole _make_iterencode though (we're basically editing only a few lines in _iterencode_list):

    import json
    from unittest.mock import patch
    
    def make_iterencode_custom(markers, _default, _encoder, _indent, _floatstr,
            _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
            ## HACK: hand-optimized bytecode; turn globals into locals
            ValueError=ValueError,
            dict=dict,
            float=float,
            id=id,
            int=int,
            isinstance=isinstance,
            list=list,
            str=str,
            tuple=tuple,
            _intstr=int.__repr__,
        ):
    
        if _indent is not None and not isinstance(_indent, str):
            _indent = ' ' * _indent
    
        def _iterencode_list(lst, _current_indent_level):
            if not lst:
                yield '[]'
                return
            if markers is not None:
                markerid = id(lst)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid] = lst
            buf = '[ '
            ## commenting this part out
            # if _indent is not None:
            #     _current_indent_level += 1
            #     newline_indent = '\n' + _indent * _current_indent_level
            #     separator = _item_separator + newline_indent
            #     buf += newline_indent
            # else:
            #     newline_indent = None
            #     separator = _item_separator
            separator = _item_separator + ' '    # <------ New line
            first = True
            for value in lst:
                if first:
                    first = False
                else:
                    buf = separator
                if isinstance(value, str):
                    yield buf + _encoder(value)
                elif value is None:
                    yield buf + 'null'
                elif value is True:
                    yield buf + 'true'
                elif value is False:
                    yield buf + 'false'
                elif isinstance(value, int):
                    # Subclasses of int/float may override __repr__, but we still
                    # want to encode them as integers/floats in JSON. One example
                    # within the standard library is IntEnum.
                    yield buf + _intstr(value)
                elif isinstance(value, float):
                    # see comment above for int
                    yield buf + _floatstr(value)
                else:
                    yield buf
                    if isinstance(value, (list, tuple)):
                        chunks = _iterencode_list(value, _current_indent_level)
                    elif isinstance(value, dict):
                        chunks = _iterencode_dict(value, _current_indent_level)
                    else:
                        chunks = _iterencode(value, _current_indent_level)
                    yield from chunks
            # if newline_indent is not None:
            #     _current_indent_level -= 1
            #     yield '\n' + _indent * _current_indent_level
            yield ' ]'
            if markers is not None:
                del markers[markerid]
    
        def _iterencode_dict(dct, _current_indent_level):
            if not dct:
                yield '{}'
                return
            if markers is not None:
                markerid = id(dct)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid] = dct
            yield '{'
            if _indent is not None:
                _current_indent_level += 1
                newline_indent = '\n' + _indent * _current_indent_level
                item_separator = _item_separator + newline_indent
                yield newline_indent
            else:
                newline_indent = None
                item_separator = _item_separator
            first = True
            if _sort_keys:
                items = sorted(dct.items())
            else:
                items = dct.items()
            for key, value in items:
                if isinstance(key, str):
                    pass
                # JavaScript is weakly typed for these, so it makes sense to
                # also allow them.  Many encoders seem to do something like this.
                elif isinstance(key, float):
                    # see comment for int/float in _make_iterencode
                    key = _floatstr(key)
                elif key is True:
                    key = 'true'
                elif key is False:
                    key = 'false'
                elif key is None:
                    key = 'null'
                elif isinstance(key, int):
                    # see comment for int/float in _make_iterencode
                    key = _intstr(key)
                elif _skipkeys:
                    continue
                else:
                    raise TypeError(f'keys must be str, int, float, bool or None, '
                                    f'not {key.__class__.__name__}')
                if first:
                    first = False
                else:
                    yield item_separator
                yield _encoder(key)
                yield _key_separator
                if isinstance(value, str):
                    yield _encoder(value)
                elif value is None:
                    yield 'null'
                elif value is True:
                    yield 'true'
                elif value is False:
                    yield 'false'
                elif isinstance(value, int):
                    # see comment for int/float in _make_iterencode
                    yield _intstr(value)
                elif isinstance(value, float):
                    # see comment for int/float in _make_iterencode
                    yield _floatstr(value)
                else:
                    if isinstance(value, (list, tuple)):
                        chunks = _iterencode_list(value, _current_indent_level)
                    elif isinstance(value, dict):
                        chunks = _iterencode_dict(value, _current_indent_level)
                    else:
                        chunks = _iterencode(value, _current_indent_level)
                    yield from chunks
            if newline_indent is not None:
                _current_indent_level -= 1
                yield '\n' + _indent * _current_indent_level
            yield '}'
            if markers is not None:
                del markers[markerid]
    
        def _iterencode(o, _current_indent_level):
            if isinstance(o, str):
                yield _encoder(o)
            elif o is None:
                yield 'null'
            elif o is True:
                yield 'true'
            elif o is False:
                yield 'false'
            elif isinstance(o, int):
                # see comment for int/float in _make_iterencode
                yield _intstr(o)
            elif isinstance(o, float):
                # see comment for int/float in _make_iterencode
                yield _floatstr(o)
            elif isinstance(o, (list, tuple)):
                yield from _iterencode_list(o, _current_indent_level)
            elif isinstance(o, dict):
                yield from _iterencode_dict(o, _current_indent_level)
            else:
                if markers is not None:
                    markerid = id(o)
                    if markerid in markers:
                        raise ValueError("Circular reference detected")
                    markers[markerid] = o
                o = _default(o)
                yield from _iterencode(o, _current_indent_level)
                if markers is not None:
                    del markers[markerid]
        return _iterencode
    
    dict_in ={
        "name": "John",
        "age": 30,
        "pets": [
            {
                "name": "Fluffy",
                "type": "cat",
                "toys": [
                    "ball",
                    "string",
                    "box"
                ]
            },
            {
                "name": "Fido",
                "type": "dog",
                "toys": [
                    "bone",
                    "frisbee"
                ]
            }
        ]
    }
    
    with patch.object(json.encoder, '_make_iterencode', make_iterencode_custom):
        print(json.dumps(dict_in, indent=4))
    

    Output:

    {
        "name": "John",
        "age": 30,
        "pets": [ {
            "name": "Fluffy",
            "type": "cat",
            "toys": [ "ball", "string", "box" ]
        }, {
            "name": "Fido",
            "type": "dog",
            "toys": [ "bone", "frisbee" ]
        } ]
    }
    

    Edit: adding white spaces before/after brackets and using mock

    Edit2: using standard library