pythonruamel.yaml

can columns be kept in ruamel.yaml?


I'm reading a YAML file, manipulating it and dumping it again with ruamel.yaml. I'd like to get it as much human readable as it was before. That requires some tables to be kept in columns.

This is a short example of what I need. I'd like the output to be in columns as in the input.

In [1]: import sys

In [2]: from ruamel.yaml import YAML

In [3]: yaml = YAML()

In [4]: tabs = """
   ...: vals:
   ...:   0: {  0:  1, 1:   2,  2:  3  }
   ...:   1: {  0: 12, 1: 2.3,  2: -1.4}
   ...: """

In [5]: yaml.dump(yaml.load(tabs), sys.stdout)
vals:
  0: {0: 1, 1: 2, 2: 3}
  1: {0: 12, 1: 2.3, 2: -1.4}

Can that be done?


Python code for reference:

import sys
from ruamel.yaml import YAML

yaml = YAML()

tabs = """
vals:
  0: {  0:  1, 1:   2,  2:  3  }
  1: {  0: 12, 1: 2.3,  2: -1.4}
"""

yaml.dump(yaml.load(tabs), sys.stdout)

Solution

  • No, that won't work. Although ruamel.yaml will keep the individual flow/block style, the extra spaces with the flow-style mappings will not be preserved.

    It is not impossible that this will be added at some future date to ruamel.yaml, but currently no such superfluous whitespace information is stored at all except for empty lines between block style.

    BTW You would also have problems with multi-line flow-style mappings with EOL comments.

    If the entries are always mappings shown on one line and themselves values in a mapping, you should be able to do some smart postprocessing (with the transform parameter of the .dump() method, to get the extra spaces in. The following is a first attempt at that:

    import sys
    import ruamel.yaml
    
    yaml_str = """\
    vals:
      0 : {  0:  1, 1: 2  ,  2:  3  }  # comment 1
      1 : {  0: 12, 1: 2.3,  2: -1.4}
      19: {  0: 42, 1: 3.1,  2: -9.9}  # comment 4
    """
    
    class ReAlign:
        def __init__(self):
            self._buffer = []  # string, leading_spaces tuples
            self._current_indent = 0  # ruamel.yaml removes root level indent
            self._ret_val = None
    
        def __call__(self, s):
            self._ret_val = []
            for line in s.splitlines():
                leading_spaces = len(line) - len(line.lstrip(' '))
                if self._current_indent == leading_spaces:
                    self._buffer.append((line, leading_spaces))
                else:
                    self.output_buffer_aligned()
                    self._buffer.append((line, leading_spaces))
                    self._current_indent = leading_spaces
            self.output_buffer_aligned()  # any trailing indented lines
            return '\n'.join(self._ret_val) + '\n'
    
        def output_buffer_aligned(self):
            # print('len', len(self._buffer), repr(self._buffer))
            if len(self._buffer) == 0:
                return
            if len(self._buffer) == 1:
                self._ret_val.append(self._buffer[0][0])
            else:
                if self.key_with_mapping_value():
                    pass
                # elif  self.key_with_sequence_value():
                #     pass
                else:
                    self._ret_val.extend([x[0] for x in self._buffer])
            self._buffer = []
    
        def key_with_mapping_value(self):
            """
            if all lines in buffer match all asserts, append to self._ret_val and return True
            else return False
            """
            yaml0 = ruamel.yaml.YAML(typ='base')
            yaml1 = ruamel.yaml.YAML()
            map_len = None
            data = []
            try:
                for line, leading_spaces in self._buffer:
                    try:
                        ld = yaml0.load(line)
                    except Exception as e:  # could not load the single line as YAML
                        return False
                    assert isinstance(ld, dict)
                    assert len(ld) == 1
                    vt = []
                    for k, v in ld.items():
                        assert isinstance(v, dict)  # superfluous as the next line would throw and exception
                        for k1, v1 in v.items():
                            v1split = v1.split('.')
                            if len(v1split) == 2:
                                vt.append((k1, v1split[0], v1split[1]))  # float
                            else:
                                vt.append((k1, v1, ''))   # integer
                    if map_len is None:
                        map_len = len(v)
                    else:
                        assert map_len == len(v)
                    comment, comment_col = None, None
                    try:
                        ldc = yaml1.load(line)
                        if (ct := ldc.ca.items.get(list(ldc.keys())[0])) is not None:
                            comment = ct[2].value.rstrip()  # strip newline
                            comment_col = ct[2].column
                    except:
                        return False
                    data.append((k, vt, comment, comment_col))
                rvl = len(self._ret_val)
                for _, leading_spaces in self._buffer:
                    self._ret_val.append(' ' * leading_spaces)
                max_key_len = max([len(d[0]) for d in data])
                # output the keys left aligned before the colon (right aligned looks nicer but is invalid YAML
                index = rvl
                for d in data:
                    self._ret_val[index] += f'{d[0]:<{max_key_len}s}: {{  '  # two spaces before first nested key
                    index += 1
                for col in range(map_len):
                    last_col = col == map_len - 1
                    max_key_len = max([len(d[1][col][0]) for d in data])
                    max_val_len0 = max([len(d[1][col][1]) for d in data])
                    max_val_len1 = max([len(d[1][col][2]) for d in data])
                    index = rvl
                    for cd in data:
                        d = cd[1][col]
                        v = f'{d[1]:>{max_val_len0}s}{"." if d[2] else " "}{d[2]:<{max_val_len1}}' 
                        self._ret_val[index] += f'{d[0]:>{max_key_len}s}: {v}{"}" if last_col else ", "}'
                        index += 1
                max_line_len = max([len(x) for x in self._ret_val[rvl:]])
                try:
                    min_comment_col = min([d[3] for d in data if d[3] is not None])
                    offset = max(max_line_len + 2 - min_comment_col, 0)
                    index = rvl
                    for d in data:
                        if d[2] is not None:
                            self._ret_val[index] = f'{self._ret_val[index]:<{d[3]+offset}s}{d[2]}'
                        index += 1
                except ValueError:  # no comments, so no min of the comment column
                    pass
                return True
            except AssertionError:
                return False
            return False
    
    
        
    yaml = ruamel.yaml.YAML()
    yaml.preserve_quotes = True
    data = yaml.load(yaml_str)
    yaml.dump(data, sys.stdout, transform=ReAlign())
    

    which gives:

    vals:
      0 : {  0:  1 , 1: 2  , 2:  3  }  # comment 1
      1 : {  0: 12 , 1: 2.3, 2: -1.4}
      19: {  0: 42 , 1: 3.1, 2: -9.9}  # comment 4