I'm reading a YAML file, manipulating it and dumping it again with ruamel.yaml
I'd like to get it as much human readable as it was before. That requires some tables to be kept in columns.
This is a short example of what I need. I'd like the output to be in columns as in the input.
In [1]: import sys
In [2]: from ruamel.yaml import YAML
In [3]: yaml = YAML()
In [4]: tabs = """
...: vals:
...: 0: { 0: 1, 1: 2, 2: 3 }
...: 1: { 0: 12, 1: 2.3, 2: -1.4}
...: """
In [5]: yaml.dump(yaml.load(tabs), sys.stdout)
0: {0: 1, 1: 2, 2: 3}
1: {0: 12, 1: 2.3, 2: -1.4}
Can that be done?
Python code for reference:
import sys
from ruamel.yaml import YAML
yaml = YAML()
tabs = """
0: { 0: 1, 1: 2, 2: 3 }
1: { 0: 12, 1: 2.3, 2: -1.4}
yaml.dump(yaml.load(tabs), sys.stdout)
No, that won't work. Although ruamel.yaml will keep the individual flow/block style, the extra spaces with the flow-style mappings will not be preserved.
It is not impossible that this will be added at some future date to ruamel.yaml
, but currently no such superfluous whitespace information is stored at all except for empty lines between block style.
BTW You would also have problems with multi-line flow-style mappings with EOL comments.
If the entries are always mappings shown on one line and themselves values in a mapping, you should be able to do some smart postprocessing (with the transform
parameter of the .dump()
method, to get the extra spaces in. The following is a first attempt at that:
import sys
import ruamel.yaml
yaml_str = """\
0 : { 0: 1, 1: 2 , 2: 3 } # comment 1
1 : { 0: 12, 1: 2.3, 2: -1.4}
19: { 0: 42, 1: 3.1, 2: -9.9} # comment 4
class ReAlign:
def __init__(self):
self._buffer = [] # string, leading_spaces tuples
self._current_indent = 0 # ruamel.yaml removes root level indent
self._ret_val = None
def __call__(self, s):
self._ret_val = []
for line in s.splitlines():
leading_spaces = len(line) - len(line.lstrip(' '))
if self._current_indent == leading_spaces:
self._buffer.append((line, leading_spaces))
self._buffer.append((line, leading_spaces))
self._current_indent = leading_spaces
self.output_buffer_aligned() # any trailing indented lines
return '\n'.join(self._ret_val) + '\n'
def output_buffer_aligned(self):
# print('len', len(self._buffer), repr(self._buffer))
if len(self._buffer) == 0:
if len(self._buffer) == 1:
if self.key_with_mapping_value():
# elif self.key_with_sequence_value():
# pass
self._ret_val.extend([x[0] for x in self._buffer])
self._buffer = []
def key_with_mapping_value(self):
if all lines in buffer match all asserts, append to self._ret_val and return True
else return False
yaml0 = ruamel.yaml.YAML(typ='base')
yaml1 = ruamel.yaml.YAML()
map_len = None
data = []
for line, leading_spaces in self._buffer:
ld = yaml0.load(line)
except Exception as e: # could not load the single line as YAML
return False
assert isinstance(ld, dict)
assert len(ld) == 1
vt = []
for k, v in ld.items():
assert isinstance(v, dict) # superfluous as the next line would throw and exception
for k1, v1 in v.items():
v1split = v1.split('.')
if len(v1split) == 2:
vt.append((k1, v1split[0], v1split[1])) # float
vt.append((k1, v1, '')) # integer
if map_len is None:
map_len = len(v)
assert map_len == len(v)
comment, comment_col = None, None
ldc = yaml1.load(line)
if (ct := ldc.ca.items.get(list(ldc.keys())[0])) is not None:
comment = ct[2].value.rstrip() # strip newline
comment_col = ct[2].column
return False
data.append((k, vt, comment, comment_col))
rvl = len(self._ret_val)
for _, leading_spaces in self._buffer:
self._ret_val.append(' ' * leading_spaces)
max_key_len = max([len(d[0]) for d in data])
# output the keys left aligned before the colon (right aligned looks nicer but is invalid YAML
index = rvl
for d in data:
self._ret_val[index] += f'{d[0]:<{max_key_len}s}: {{ ' # two spaces before first nested key
index += 1
for col in range(map_len):
last_col = col == map_len - 1
max_key_len = max([len(d[1][col][0]) for d in data])
max_val_len0 = max([len(d[1][col][1]) for d in data])
max_val_len1 = max([len(d[1][col][2]) for d in data])
index = rvl
for cd in data:
d = cd[1][col]
v = f'{d[1]:>{max_val_len0}s}{"." if d[2] else " "}{d[2]:<{max_val_len1}}'
self._ret_val[index] += f'{d[0]:>{max_key_len}s}: {v}{"}" if last_col else ", "}'
index += 1
max_line_len = max([len(x) for x in self._ret_val[rvl:]])
min_comment_col = min([d[3] for d in data if d[3] is not None])
offset = max(max_line_len + 2 - min_comment_col, 0)
index = rvl
for d in data:
if d[2] is not None:
self._ret_val[index] = f'{self._ret_val[index]:<{d[3]+offset}s}{d[2]}'
index += 1
except ValueError: # no comments, so no min of the comment column
return True
except AssertionError:
return False
return False
yaml = ruamel.yaml.YAML()
yaml.preserve_quotes = True
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout, transform=ReAlign())
which gives:
0 : { 0: 1 , 1: 2 , 2: 3 } # comment 1
1 : { 0: 12 , 1: 2.3, 2: -1.4}
19: { 0: 42 , 1: 3.1, 2: -9.9} # comment 4