I have a json which contains dictionaries, lists, integers etc.
json_str = '''
{
"name": "John",
"age": 30,
"pets": [
{
"name": "Fluffy",
"type": "cat",
"toys": [
"ball",
"string",
"box"
]
},
{
"name": "Fido",
"type": "dog",
"toys": [
"bone",
"frisbee"
]
}
]
}
'''
Currently each element inside list starts with a new line. I want list elements to start from the same line as the last element ended. The end result should look something like this -
{
"name": "John",
"age": 30,
"pets": [ {
"name": "Fluffy",
"type": "cat",
"toys": [ "ball", "string", "box" ]
}, {
"name": "Fido",
"type": "dog",
"toys": [ "bone", "frisbee" ]
} ]
}
'''
Basically I want list elements to be separated by a comma and a whitespace. I'm using python json.dumps to write json to a file and want a solution in python
I've searched through internet and chatgpt but couldn't find anything that works. I found some solution like using custom encoder in json.dumps but it doesn't work -
class ObjectEncoder(JSONEncoder):
def default(self, o):
if isinstance(o, Enum):
return o.name
if isinstance(o, list):
return ", ".join(json.dumps(i, indent=self.indent) for i in o)
## Remove all attributes which are None;
for key, value in list(o.__dict__.items()):
if value is None:
del o.__dict__[key]
return o.__dict__
The suggestion made in this answer works. You'll have to patch the whole _make_iterencode
though (we're basically editing only a few lines in _iterencode_list
):
import json
from unittest.mock import patch
def make_iterencode_custom(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
_intstr=int.__repr__,
):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '[ '
## commenting this part out
# if _indent is not None:
# _current_indent_level += 1
# newline_indent = '\n' + _indent * _current_indent_level
# separator = _item_separator + newline_indent
# buf += newline_indent
# else:
# newline_indent = None
# separator = _item_separator
separator = _item_separator + ' ' # <------ New line
first = True
for value in lst:
if first:
first = False
else:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
# if newline_indent is not None:
# _current_indent_level -= 1
# yield '\n' + _indent * _current_indent_level
yield ' ]'
if markers is not None:
del markers[markerid]
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
# see comment for int/float in _make_iterencode
yield _intstr(o)
elif isinstance(o, float):
# see comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
yield from _iterencode_dict(o, _current_indent_level)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, _current_indent_level)
if markers is not None:
del markers[markerid]
return _iterencode
dict_in ={
"name": "John",
"age": 30,
"pets": [
{
"name": "Fluffy",
"type": "cat",
"toys": [
"ball",
"string",
"box"
]
},
{
"name": "Fido",
"type": "dog",
"toys": [
"bone",
"frisbee"
]
}
]
}
with patch.object(json.encoder, '_make_iterencode', make_iterencode_custom):
print(json.dumps(dict_in, indent=4))
Output:
{
"name": "John",
"age": 30,
"pets": [ {
"name": "Fluffy",
"type": "cat",
"toys": [ "ball", "string", "box" ]
}, {
"name": "Fido",
"type": "dog",
"toys": [ "bone", "frisbee" ]
} ]
}
Edit: adding white spaces before/after brackets and using mock
Edit2: using standard library