can columns be kept in ruamel.yaml?

I'm reading a YAML file, manipulating it and dumping it again with ruamel.yaml. I'd like to get it as much human readable as it was before. That requires some tables to be kept in columns.

This is a short example of what I need. I'd like the output to be in columns as in the input.

In [1]: import sys

In [2]: from ruamel.yaml import YAML

In [3]: yaml = YAML()

In [4]: tabs = """
   ...: vals:
   ...:   0: {  0:  1, 1:   2,  2:  3  }
   ...:   1: {  0: 12, 1: 2.3,  2: -1.4}
   ...: """

In [5]: yaml.dump(yaml.load(tabs), sys.stdout)
vals:
  0: {0: 1, 1: 2, 2: 3}
  1: {0: 12, 1: 2.3, 2: -1.4}

Can that be done?

Python code for reference:

import sys
from ruamel.yaml import YAML

yaml = YAML()

tabs = """
vals:
  0: {  0:  1, 1:   2,  2:  3  }
  1: {  0: 12, 1: 2.3,  2: -1.4}
"""

yaml.dump(yaml.load(tabs), sys.stdout)

Solution

No, that won't work. Although ruamel.yaml will keep the individual flow/block style, the extra spaces with the flow-style mappings will not be preserved.

It is not impossible that this will be added at some future date to ruamel.yaml, but currently no such superfluous whitespace information is stored at all except for empty lines between block style.

BTW You would also have problems with multi-line flow-style mappings with EOL comments.

If the entries are always mappings shown on one line and themselves values in a mapping, you should be able to do some smart postprocessing (with the transform parameter of the .dump() method, to get the extra spaces in. The following is a first attempt at that:

import sys
import ruamel.yaml

yaml_str = """\
vals:
  0 : {  0:  1, 1: 2  ,  2:  3  }  # comment 1
  1 : {  0: 12, 1: 2.3,  2: -1.4}
  19: {  0: 42, 1: 3.1,  2: -9.9}  # comment 4
"""

class ReAlign:
    def __init__(self):
        self._buffer = []  # string, leading_spaces tuples
        self._current_indent = 0  # ruamel.yaml removes root level indent
        self._ret_val = None

    def __call__(self, s):
        self._ret_val = []
        for line in s.splitlines():
            leading_spaces = len(line) - len(line.lstrip(' '))
            if self._current_indent == leading_spaces:
                self._buffer.append((line, leading_spaces))
            else:
                self.output_buffer_aligned()
                self._buffer.append((line, leading_spaces))
                self._current_indent = leading_spaces
        self.output_buffer_aligned()  # any trailing indented lines
        return '\n'.join(self._ret_val) + '\n'

    def output_buffer_aligned(self):
        # print('len', len(self._buffer), repr(self._buffer))
        if len(self._buffer) == 0:
            return
        if len(self._buffer) == 1:
            self._ret_val.append(self._buffer[0][0])
        else:
            if self.key_with_mapping_value():
                pass
            # elif  self.key_with_sequence_value():
            #     pass
            else:
                self._ret_val.extend([x[0] for x in self._buffer])
        self._buffer = []

    def key_with_mapping_value(self):
        """
        if all lines in buffer match all asserts, append to self._ret_val and return True
        else return False
        """
        yaml0 = ruamel.yaml.YAML(typ='base')
        yaml1 = ruamel.yaml.YAML()
        map_len = None
        data = []
        try:
            for line, leading_spaces in self._buffer:
                try:
                    ld = yaml0.load(line)
                except Exception as e:  # could not load the single line as YAML
                    return False
                assert isinstance(ld, dict)
                assert len(ld) == 1
                vt = []
                for k, v in ld.items():
                    assert isinstance(v, dict)  # superfluous as the next line would throw and exception
                    for k1, v1 in v.items():
                        v1split = v1.split('.')
                        if len(v1split) == 2:
                            vt.append((k1, v1split[0], v1split[1]))  # float
                        else:
                            vt.append((k1, v1, ''))   # integer
                if map_len is None:
                    map_len = len(v)
                else:
                    assert map_len == len(v)
                comment, comment_col = None, None
                try:
                    ldc = yaml1.load(line)
                    if (ct := ldc.ca.items.get(list(ldc.keys())[0])) is not None:
                        comment = ct[2].value.rstrip()  # strip newline
                        comment_col = ct[2].column
                except:
                    return False
                data.append((k, vt, comment, comment_col))
            rvl = len(self._ret_val)
            for _, leading_spaces in self._buffer:
                self._ret_val.append(' ' * leading_spaces)
            max_key_len = max([len(d[0]) for d in data])
            # output the keys left aligned before the colon (right aligned looks nicer but is invalid YAML
            index = rvl
            for d in data:
                self._ret_val[index] += f'{d[0]:<{max_key_len}s}: {{  '  # two spaces before first nested key
                index += 1
            for col in range(map_len):
                last_col = col == map_len - 1
                max_key_len = max([len(d[1][col][0]) for d in data])
                max_val_len0 = max([len(d[1][col][1]) for d in data])
                max_val_len1 = max([len(d[1][col][2]) for d in data])
                index = rvl
                for cd in data:
                    d = cd[1][col]
                    v = f'{d[1]:>{max_val_len0}s}{"." if d[2] else " "}{d[2]:<{max_val_len1}}' 
                    self._ret_val[index] += f'{d[0]:>{max_key_len}s}: {v}{"}" if last_col else ", "}'
                    index += 1
            max_line_len = max([len(x) for x in self._ret_val[rvl:]])
            try:
                min_comment_col = min([d[3] for d in data if d[3] is not None])
                offset = max(max_line_len + 2 - min_comment_col, 0)
                index = rvl
                for d in data:
                    if d[2] is not None:
                        self._ret_val[index] = f'{self._ret_val[index]:<{d[3]+offset}s}{d[2]}'
                    index += 1
            except ValueError:  # no comments, so no min of the comment column
                pass
            return True
        except AssertionError:
            return False
        return False


    
yaml = ruamel.yaml.YAML()
yaml.preserve_quotes = True
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout, transform=ReAlign())

which gives:

vals:
  0 : {  0:  1 , 1: 2  , 2:  3  }  # comment 1
  1 : {  0: 12 , 1: 2.3, 2: -1.4}
  19: {  0: 42 , 1: 3.1, 2: -9.9}  # comment 4