python python-3.x string regexp-replace jsonencoder

List elements should be separated by comma and space in json.dumps

I have a json which contains dictionaries, lists, integers etc.

json_str = '''
{
    "name": "John",
    "age": 30,
    "pets": [
        {
            "name": "Fluffy",
            "type": "cat",
            "toys": [
                "ball",
                "string",
                "box"
            ]
        },
        {
            "name": "Fido",
            "type": "dog",
            "toys": [
                "bone",
                "frisbee"
            ]
        }
    ]
}
'''

Currently each element inside list starts with a new line. I want list elements to start from the same line as the last element ended. The end result should look something like this -

{
    "name": "John",
    "age": 30,
    "pets": [ {
          "name": "Fluffy",
          "type": "cat",
          "toys": [ "ball", "string", "box" ]
        }, {
           "name": "Fido",
           "type": "dog",
           "toys": [ "bone", "frisbee" ]
     } ]
}
'''

Basically I want list elements to be separated by a comma and a whitespace. I'm using python json.dumps to write json to a file and want a solution in python

I've searched through internet and chatgpt but couldn't find anything that works. I found some solution like using custom encoder in json.dumps but it doesn't work -

class ObjectEncoder(JSONEncoder):
    def default(self, o):
        if isinstance(o, Enum):
            return o.name

        if isinstance(o, list):
            return ", ".join(json.dumps(i, indent=self.indent) for i in o)
        ## Remove all attributes which are None;

        for key, value in list(o.__dict__.items()):
            if value is None:
                del o.__dict__[key]
        return o.__dict__

Solution

The suggestion made in this answer works. You'll have to patch the whole _make_iterencode though (we're basically editing only a few lines in _iterencode_list):

import json
from unittest.mock import patch

def make_iterencode_custom(markers, _default, _encoder, _indent, _floatstr,
        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
        ## HACK: hand-optimized bytecode; turn globals into locals
        ValueError=ValueError,
        dict=dict,
        float=float,
        id=id,
        int=int,
        isinstance=isinstance,
        list=list,
        str=str,
        tuple=tuple,
        _intstr=int.__repr__,
    ):

    if _indent is not None and not isinstance(_indent, str):
        _indent = ' ' * _indent

    def _iterencode_list(lst, _current_indent_level):
        if not lst:
            yield '[]'
            return
        if markers is not None:
            markerid = id(lst)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = lst
        buf = '[ '
        ## commenting this part out
        # if _indent is not None:
        #     _current_indent_level += 1
        #     newline_indent = '\n' + _indent * _current_indent_level
        #     separator = _item_separator + newline_indent
        #     buf += newline_indent
        # else:
        #     newline_indent = None
        #     separator = _item_separator
        separator = _item_separator + ' '    # <------ New line
        first = True
        for value in lst:
            if first:
                first = False
            else:
                buf = separator
            if isinstance(value, str):
                yield buf + _encoder(value)
            elif value is None:
                yield buf + 'null'
            elif value is True:
                yield buf + 'true'
            elif value is False:
                yield buf + 'false'
            elif isinstance(value, int):
                # Subclasses of int/float may override __repr__, but we still
                # want to encode them as integers/floats in JSON. One example
                # within the standard library is IntEnum.
                yield buf + _intstr(value)
            elif isinstance(value, float):
                # see comment above for int
                yield buf + _floatstr(value)
            else:
                yield buf
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                yield from chunks
        # if newline_indent is not None:
        #     _current_indent_level -= 1
        #     yield '\n' + _indent * _current_indent_level
        yield ' ]'
        if markers is not None:
            del markers[markerid]

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = sorted(dct.items())
        else:
            items = dct.items()
        for key, value in items:
            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                # see comment for int/float in _make_iterencode
                key = _floatstr(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif isinstance(key, int):
                # see comment for int/float in _make_iterencode
                key = _intstr(key)
            elif _skipkeys:
                continue
            else:
                raise TypeError(f'keys must be str, int, float, bool or None, '
                                f'not {key.__class__.__name__}')
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, str):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, int):
                # see comment for int/float in _make_iterencode
                yield _intstr(value)
            elif isinstance(value, float):
                # see comment for int/float in _make_iterencode
                yield _floatstr(value)
            else:
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                yield from chunks
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + _indent * _current_indent_level
        yield '}'
        if markers is not None:
            del markers[markerid]

    def _iterencode(o, _current_indent_level):
        if isinstance(o, str):
            yield _encoder(o)
        elif o is None:
            yield 'null'
        elif o is True:
            yield 'true'
        elif o is False:
            yield 'false'
        elif isinstance(o, int):
            # see comment for int/float in _make_iterencode
            yield _intstr(o)
        elif isinstance(o, float):
            # see comment for int/float in _make_iterencode
            yield _floatstr(o)
        elif isinstance(o, (list, tuple)):
            yield from _iterencode_list(o, _current_indent_level)
        elif isinstance(o, dict):
            yield from _iterencode_dict(o, _current_indent_level)
        else:
            if markers is not None:
                markerid = id(o)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid] = o
            o = _default(o)
            yield from _iterencode(o, _current_indent_level)
            if markers is not None:
                del markers[markerid]
    return _iterencode

dict_in ={
    "name": "John",
    "age": 30,
    "pets": [
        {
            "name": "Fluffy",
            "type": "cat",
            "toys": [
                "ball",
                "string",
                "box"
            ]
        },
        {
            "name": "Fido",
            "type": "dog",
            "toys": [
                "bone",
                "frisbee"
            ]
        }
    ]
}

with patch.object(json.encoder, '_make_iterencode', make_iterencode_custom):
    print(json.dumps(dict_in, indent=4))

Output:

{
    "name": "John",
    "age": 30,
    "pets": [ {
        "name": "Fluffy",
        "type": "cat",
        "toys": [ "ball", "string", "box" ]
    }, {
        "name": "Fido",
        "type": "dog",
        "toys": [ "bone", "frisbee" ]
    } ]
}

Edit: adding white spaces before/after brackets and using mock

Edit2: using standard library