My aim is to create a YAML loader that can construct mappings from tagged scalars.
Here is a stripped-down version of the loader which constructs an object containing names from a scalar tagged !fullname
.
import ruamel.yaml
class MyLoader(ruamel.yaml.YAML):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.constructor.add_constructor("!fullname", self._fullname_constructor)
@staticmethod
def _fullname_constructor(constructor, node):
value = constructor.construct_scalar(node)
first, *middle, last = value.split()
return {
"first_name": first,
"middle_names": middle,
"last_name": last
}
myyaml = MyLoader()
The loader can successfully substitute objects for tagged scalars i.e.
>>> myyaml.load("""
- !fullname Albus Percival Wulfric Brian Dumbledore
- !fullname Severus Snape""")
[
{'first_name': 'Albus', 'middle_names': ['Percival', 'Wulfric', 'Brian'], 'last_name': 'Dumbledore'},
{'first_name': 'Severus', 'middle_names': [], 'last_name': 'Snape'}
]
However, the construction fails when I try to merge the constructed mapping into an enclosing object
>>> yaml.load("""
id: 0
<<: !fullname Albus Percival Wulfric Brian Dumbledore""")
ruamel.yaml.constructor.ConstructorError: while constructing a mapping (...)
expected a mapping or list of mappings for merging, but found scalar
My understanding is that the type of the node is still a ScalarNode
, so the constructor is unable to process it even though it ultimately resolves to a mapping.
How to modify my code, such that !fullname {scalar}
can be merged into the object?
The merge key language indepent type for YAML definition states:
The “<<” merge key is used to indicate that all the keys of one or more specified maps should be inserted in to the current map. If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the current mapping, unless the key already exists in it. If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier in the sequence override keys specified in later mapping nodes.
You have a scalar node, not a mapping node or a sequence of mapping nodes. This is independent of the (Python) type that gets constructed from the scalar node.
If you want to adapt the parser to accept your non-YAML, you need to adapt the flattening routine that handles the merges. Among other things this expects a ruamel.yaml.commments.CommentedMap
and not a simple dict
:
# coding: utf-8
import sys
from pathlib import Path
import ruamel.yaml
import ruamel.yaml
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def flatten_mapping(self, node):
def constructed(value_node):
if value_node in self.constructed_objects:
value = self.constructed_objects[value_node]
else:
value = self.construct_object(value_node, deep=True)
return value
# merge = []
merge_map_list: List[Any] = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
if merge_map_list: # double << key
if self.allow_duplicate_keys:
del node.value[index]
index += 1
continue
args = [
'while constructing a mapping',
node.start_mark,
f'found duplicate key "{key_node.value}"',
key_node.start_mark,
"""
To suppress this check see:
http://yaml.readthedocs.io/en/latest/api.html#duplicate-keys
""",
"""\
Duplicate keys will become an error in future releases, and are errors
by default when using the new API.
""",
]
if self.allow_duplicate_keys is None:
warnings.warn(DuplicateKeyFutureWarning(*args), stacklevel=1)
else:
raise DuplicateKeyError(*args)
del node.value[index]
cval = constructed(value_node)
if isinstance(value_node, ruamel.yaml.nodes.MappingNode):
merge_map_list.append((index, cval))
elif isinstance(value_node, ruamel.yaml.nodes.SequenceNode):
for subnode in value_node.value:
if not isinstance(subnode, ruamel.yaml.nodes.MappingNode):
raise ConstructorError(
'while constructing a mapping',
node.start_mark,
f'expected a mapping for merging, but found {subnode.id!s}',
subnode.start_mark,
)
merge_map_list.append((index, constructed(subnode)))
elif isinstance(value_node, ruamel.yaml.nodes.ScalarNode) and isinstance(cval, dict):
merge_map_list.append((index, cval))
else:
raise ConstructorError(
'while constructing a mapping',
node.start_mark,
'expected a mapping or list of mappings for merging, '
f'but found {value_node.id!s}',
value_node.start_mark,
)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
return merge_map_list
class MyLoader(ruamel.yaml.YAML):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.Constructor = MyConstructor
self.constructor.add_constructor("!fullname", self._fullname_constructor)
@staticmethod
def _fullname_constructor(constructor, node):
value = constructor.construct_scalar(node)
first, *middle, last = value.split()
return ruamel.yaml.comments.CommentedMap({
"first_name": first,
"middle_names": middle,
"last_name": last
})
myyaml = MyLoader()
data = myyaml.load("""\
id: 0
<<: !fullname Albus Percival Wulfric Brian Dumbledore
""")
print(f'{data=}')
print()
myyaml.dump(data, sys.stdout)
which gives:
data={'id': 0, 'first_name': 'Albus', 'middle_names': ['Percival', 'Wulfric', 'Brian'], 'last_name': 'Dumbledore'}
id: 0
<<:
first_name: Albus
middle_names:
- Percival
- Wulfric
- Brian
last_name: Dumbledore
As you can see the merge is preserved when dumping, but the scalar is not reconstructed.