pythonyamlruamel.yaml

How to flatten a mapping constructed from a tagged scalar using ruamel.yaml


My aim is to create a YAML loader that can construct mappings from tagged scalars. Here is a stripped-down version of the loader which constructs an object containing names from a scalar tagged !fullname.

import ruamel.yaml

class MyLoader(ruamel.yaml.YAML):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.constructor.add_constructor("!fullname", self._fullname_constructor)

    @staticmethod
    def _fullname_constructor(constructor, node):
        value = constructor.construct_scalar(node)
        first, *middle, last = value.split()
        return {
            "first_name": first,
            "middle_names": middle,
            "last_name": last

        }

myyaml = MyLoader()

The loader can successfully substitute objects for tagged scalars i.e.

>>> myyaml.load("""
    - !fullname Albus Percival Wulfric Brian Dumbledore
    - !fullname Severus Snape""")
[
  {'first_name': 'Albus', 'middle_names': ['Percival', 'Wulfric', 'Brian'], 'last_name': 'Dumbledore'},
  {'first_name': 'Severus', 'middle_names': [], 'last_name': 'Snape'}
]

However, the construction fails when I try to merge the constructed mapping into an enclosing object

>>> yaml.load("""
    id: 0
    <<: !fullname Albus Percival Wulfric Brian Dumbledore""")
ruamel.yaml.constructor.ConstructorError: while constructing a mapping (...)
expected a mapping or list of mappings for merging, but found scalar

My understanding is that the type of the node is still a ScalarNode, so the constructor is unable to process it even though it ultimately resolves to a mapping. How to modify my code, such that !fullname {scalar} can be merged into the object?


Solution

  • The merge key language indepent type for YAML definition states:

    The “<<” merge key is used to indicate that all the keys of one or more specified maps should be inserted in to the current map. If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the current mapping, unless the key already exists in it. If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier in the sequence override keys specified in later mapping nodes.

    You have a scalar node, not a mapping node or a sequence of mapping nodes. This is independent of the (Python) type that gets constructed from the scalar node.

    If you want to adapt the parser to accept your non-YAML, you need to adapt the flattening routine that handles the merges. Among other things this expects a ruamel.yaml.commments.CommentedMap and not a simple dict:

    # coding: utf-8
    import sys
    from pathlib import Path
    import ruamel.yaml
    
    import ruamel.yaml
    
    class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
        def flatten_mapping(self, node):
            def constructed(value_node):
                if value_node in self.constructed_objects:
                    value = self.constructed_objects[value_node]
                else:
                    value = self.construct_object(value_node, deep=True)
                return value
    
            # merge = []
            merge_map_list: List[Any] = []
            index = 0
            while index < len(node.value):
                key_node, value_node = node.value[index]
                if key_node.tag == 'tag:yaml.org,2002:merge':
                    if merge_map_list:  # double << key
                        if self.allow_duplicate_keys:
                            del node.value[index]
                            index += 1
                            continue
                        args = [
                            'while constructing a mapping',
                            node.start_mark,
                            f'found duplicate key "{key_node.value}"',
                            key_node.start_mark,
                            """
                            To suppress this check see:
                               http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys
                            """,
                            """\
                            Duplicate keys will become an error in future releases, and are errors
                            by default when using the new API.
                            """,
                        ]
                        if self.allow_duplicate_keys is None:
                            warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1)
                        else:
                            raise DuplicateKeyError(*args)
                    del node.value[index]
                    cval = constructed(value_node)
                    if isinstance(value_node, ruamel.yaml.nodes.MappingNode):
                        merge_map_list.append((index, cval))
                    elif isinstance(value_node, ruamel.yaml.nodes.SequenceNode):
                        for subnode in value_node.value:
                            if not isinstance(subnode, ruamel.yaml.nodes.MappingNode):
                                raise ConstructorError(
                                    'while constructing a mapping',
                                    node.start_mark,
                                    f'expected a mapping for merging, but found {subnode.id!s}',
                                    subnode.start_mark,
                                )
                            merge_map_list.append((index, constructed(subnode)))
                    elif isinstance(value_node, ruamel.yaml.nodes.ScalarNode) and isinstance(cval, dict):
                        merge_map_list.append((index, cval))
                    else:
                        raise ConstructorError(
                            'while constructing a mapping',
                            node.start_mark,
                            'expected a mapping or list of mappings for merging, '
                            f'but found {value_node.id!s}',
                            value_node.start_mark,
                        )
                elif key_node.tag == 'tag:yaml.org,2002:value':
                    key_node.tag = 'tag:yaml.org,2002:str'
                    index += 1
                else:
                    index += 1
            return merge_map_list
    
    
    class MyLoader(ruamel.yaml.YAML):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.Constructor = MyConstructor
            self.constructor.add_constructor("!fullname", self._fullname_constructor)
    
        @staticmethod
        def _fullname_constructor(constructor, node):
            value = constructor.construct_scalar(node)
            first, *middle, last = value.split()
            return ruamel.yaml.comments.CommentedMap({
                "first_name": first,
                "middle_names": middle,
                "last_name": last
    
            })
    
    
    
    myyaml = MyLoader()
    
    data = myyaml.load("""\
    id: 0
    <<: !fullname Albus Percival Wulfric Brian Dumbledore
    """)
    
    print(f'{data=}')
    print()
    myyaml.dump(data, sys.stdout)
    

    which gives:

    data={'id': 0, 'first_name': 'Albus', 'middle_names': ['Percival', 'Wulfric', 'Brian'], 'last_name': 'Dumbledore'}
    
    id: 0
    <<:
      first_name: Albus
      middle_names:
      - Percival
      - Wulfric
      - Brian
      last_name: Dumbledore
    

    As you can see the merge is preserved when dumping, but the scalar is not reconstructed.