I'm reading a YAML file, manipulating it and dumping it again with ruamel.yaml
.
I'd like to get it as much human readable as it was before. That requires some tables to be kept in columns.
This is a short example of what I need. I'd like the output to be in columns as in the input.
In [1]: import sys
In [2]: from ruamel.yaml import YAML
In [3]: yaml = YAML()
In [4]: tabs = """
...: vals:
...: 0: { 0: 1, 1: 2, 2: 3 }
...: 1: { 0: 12, 1: 2.3, 2: -1.4}
...: """
In [5]: yaml.dump(yaml.load(tabs), sys.stdout)
vals:
0: {0: 1, 1: 2, 2: 3}
1: {0: 12, 1: 2.3, 2: -1.4}
Can that be done?
Python code for reference:
import sys
from ruamel.yaml import YAML
yaml = YAML()
tabs = """
vals:
0: { 0: 1, 1: 2, 2: 3 }
1: { 0: 12, 1: 2.3, 2: -1.4}
"""
yaml.dump(yaml.load(tabs), sys.stdout)
No, that won't work. Although ruamel.yaml will keep the individual flow/block style, the extra spaces with the flow-style mappings will not be preserved.
It is not impossible that this will be added at some future date to ruamel.yaml
, but currently no such superfluous whitespace information is stored at all except for empty lines between block style.
BTW You would also have problems with multi-line flow-style mappings with EOL comments.
If the entries are always mappings shown on one line and themselves values in a mapping, you should be able to do some smart postprocessing (with the transform
parameter of the .dump()
method, to get the extra spaces in. The following is a first attempt at that:
import sys
import ruamel.yaml
yaml_str = """\
vals:
0 : { 0: 1, 1: 2 , 2: 3 } # comment 1
1 : { 0: 12, 1: 2.3, 2: -1.4}
19: { 0: 42, 1: 3.1, 2: -9.9} # comment 4
"""
class ReAlign:
def __init__(self):
self._buffer = [] # string, leading_spaces tuples
self._current_indent = 0 # ruamel.yaml removes root level indent
self._ret_val = None
def __call__(self, s):
self._ret_val = []
for line in s.splitlines():
leading_spaces = len(line) - len(line.lstrip(' '))
if self._current_indent == leading_spaces:
self._buffer.append((line, leading_spaces))
else:
self.output_buffer_aligned()
self._buffer.append((line, leading_spaces))
self._current_indent = leading_spaces
self.output_buffer_aligned() # any trailing indented lines
return '\n'.join(self._ret_val) + '\n'
def output_buffer_aligned(self):
# print('len', len(self._buffer), repr(self._buffer))
if len(self._buffer) == 0:
return
if len(self._buffer) == 1:
self._ret_val.append(self._buffer[0][0])
else:
if self.key_with_mapping_value():
pass
# elif self.key_with_sequence_value():
# pass
else:
self._ret_val.extend([x[0] for x in self._buffer])
self._buffer = []
def key_with_mapping_value(self):
"""
if all lines in buffer match all asserts, append to self._ret_val and return True
else return False
"""
yaml0 = ruamel.yaml.YAML(typ='base')
yaml1 = ruamel.yaml.YAML()
map_len = None
data = []
try:
for line, leading_spaces in self._buffer:
try:
ld = yaml0.load(line)
except Exception as e: # could not load the single line as YAML
return False
assert isinstance(ld, dict)
assert len(ld) == 1
vt = []
for k, v in ld.items():
assert isinstance(v, dict) # superfluous as the next line would throw and exception
for k1, v1 in v.items():
v1split = v1.split('.')
if len(v1split) == 2:
vt.append((k1, v1split[0], v1split[1])) # float
else:
vt.append((k1, v1, '')) # integer
if map_len is None:
map_len = len(v)
else:
assert map_len == len(v)
comment, comment_col = None, None
try:
ldc = yaml1.load(line)
if (ct := ldc.ca.items.get(list(ldc.keys())[0])) is not None:
comment = ct[2].value.rstrip() # strip newline
comment_col = ct[2].column
except:
return False
data.append((k, vt, comment, comment_col))
rvl = len(self._ret_val)
for _, leading_spaces in self._buffer:
self._ret_val.append(' ' * leading_spaces)
max_key_len = max([len(d[0]) for d in data])
# output the keys left aligned before the colon (right aligned looks nicer but is invalid YAML
index = rvl
for d in data:
self._ret_val[index] += f'{d[0]:<{max_key_len}s}: {{ ' # two spaces before first nested key
index += 1
for col in range(map_len):
last_col = col == map_len - 1
max_key_len = max([len(d[1][col][0]) for d in data])
max_val_len0 = max([len(d[1][col][1]) for d in data])
max_val_len1 = max([len(d[1][col][2]) for d in data])
index = rvl
for cd in data:
d = cd[1][col]
v = f'{d[1]:>{max_val_len0}s}{"." if d[2] else " "}{d[2]:<{max_val_len1}}'
self._ret_val[index] += f'{d[0]:>{max_key_len}s}: {v}{"}" if last_col else ", "}'
index += 1
max_line_len = max([len(x) for x in self._ret_val[rvl:]])
try:
min_comment_col = min([d[3] for d in data if d[3] is not None])
offset = max(max_line_len + 2 - min_comment_col, 0)
index = rvl
for d in data:
if d[2] is not None:
self._ret_val[index] = f'{self._ret_val[index]:<{d[3]+offset}s}{d[2]}'
index += 1
except ValueError: # no comments, so no min of the comment column
pass
return True
except AssertionError:
return False
return False
yaml = ruamel.yaml.YAML()
yaml.preserve_quotes = True
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout, transform=ReAlign())
which gives:
vals:
0 : { 0: 1 , 1: 2 , 2: 3 } # comment 1
1 : { 0: 12 , 1: 2.3, 2: -1.4}
19: { 0: 42 , 1: 3.1, 2: -9.9} # comment 4