pythonjsonpyyamlruamel.yaml

How to use ruamel.yaml to output line-delimited/formatted json block?


I'm currently using this code block with pyyaml, but I want to get the same thing done with ruamel.yaml

import yaml
import json
envList=envList={"env":"development", "region":"us", "tag":"latest", "kubesvcname":"service", "containername":"testcontainer", "namespace":"kubens", "buildid":"1000"}
jsonFile = json.dumps(envList, indent=4)
configMapHeader={ \
    "apiVersion": "v1", \
    "kind":"ConfigMap", \
    "metadata": { \
        "name": f'{envList["kubesvcname"]}_{envList["containername"]}-{envList["tag"]}-{envList["buildid"]}', \
        "namespace": f'{envList["namespace"]}'\
    },
    "data":{"appsettings.json": \
             jsonFile }
}

def str_presenter(dumper, data):
    """configures yaml for dumping multiline strings
    Ref: https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data"""
    if len(data.splitlines()) > 1:  # check for multiline string
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)


yaml.representer.SafeRepresenter.add_representer(str, str_presenter) 
print(yaml.safe_dump(configMapHeader, sort_keys=False, line_break=True))

the output of this looks something like so:

apiVersion: v1
kind: ConfigMap
metadata:
  name: service_testcontainer-latest-1000
  namespace: kubens
data:
  appsettings.json: |-
    {
        "env": "development",
        "region": "us",
        "tag": "latest",
        "kubesvcname": "service",
        "containername": "testcontainer",
        "namespace": "kubens",
        "buildid": "1000"
    }

When I try to do something similar with ruamel.yaml, I get the \n line breaks in the output file.

This is the code:

def initYamlParser():
    import ruamel.yaml as yaml
    yaml = yaml.YAML()
    yaml.preserve_quotes = True
    yaml.explicit_start = True
    return yaml

import sys
import json
yaml=initYamlParser()
envList=envList={"env":"development", "region":"us", "tag":"latest", "kubesvcname":"service", "containerver": "latest", "containername":"testcontainer", "namespace":"kubens", "buildid":"1000"}
jsonFile = json.dumps(envList, indent=4)
configMapHeader={ \
    "apiVersion": "v1", \
    "kind":"ConfigMap", \
    "metadata": { \
        "name": f'{envList["kubesvcname"]}_{envList["containername"]}-{envList["containerver"]}-{envList["buildid"]}', \
        "namespace": f'{envList["namespace"]}'\
    },
    "data":{"appsettings.json": \
            jsonFile }
}
print(configMapHeader)
with open("file.yaml", 'w+') as file:
    yaml.dump(configMapHeader, file)

This is the output:

apiVersion: v1
kind: ConfigMap
metadata:
  name: service_testcontainer-latest-1000
  namespace: kubens
data:
  appsettings.json: "{\n    \"env\": \"development\",\n    \"region\": \"us\",\n \
    \   \"tag\": \"latest\",\n    \"kubesvcname\": \"service\",\n    \"containerver\"\
    : \"latest\",\n    \"containername\": \"testcontainer\",\n    \"namespace\": \"\
    kubens\",\n    \"buildid\": \"1000\"\n}"

Ruamel.yaml doesn't support str_presenter, so I'm hoping either for an ruamel.yaml compatible version of that function or a way to present it internally with ruamel.yaml


Solution

  • First of all you should not normally append to a file when dumping a YAML document. Since ruamel.yaml write UTF-8 to files open them as "wb".

    You set yaml.preserve_quotes = True, but that only works when you round-trip (load YAML, then dump it).

    Then in ruamel.yaml you can have a different representer as well, just attach it to the representer attribute (which is an instance of the Representer attribute, which defaults to RoundTripRepresenter). The only difference is that it is handed in a representer as first argument:

    import sys
    import json
    import ruamel.yaml
       
    yaml = ruamel.yaml.YAML()
    yaml.explicit_start = True
    
    def str_presenter(cls, data):
        if len(data.splitlines()) > 1:  # check for multiline string
            return cls.represent_scalar('tag:yaml.org,2002:str', data, style='|')
        return cls.represent_scalar('tag:yaml.org,2002:str', data)
    
    yaml.representer.add_representer(str, str_presenter)
    
    envList={"env":"development", "region":"us", "tag":"latest", "kubesvcname":"service", "containerver": "latest", "containername":"testcontainer", "namespace":"kubens", "buildid":"1000"}
    jsonFile = json.dumps(envList, indent=4)
    configMapHeader={ \
        "apiVersion": "v1", \
        "kind":"ConfigMap", \
        "metadata": { \
            "name": f'{envList["kubesvcname"]}_{envList["containername"]}-{envList["containerver"]}-{envList["buildid"]}', \
            "namespace": f'{envList["namespace"]}'\
        },
        "data":{"appsettings.json": \
                jsonFile }
    }
     
    
    # print(data)
    yaml.dump(configMapHeader, sys.stdout)
    

    which gives:

    ---
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: service_testcontainer-latest-1000
      namespace: kubens
    data:
      appsettings.json: |-
        {
            "env": "development",
            "region": "us",
            "tag": "latest",
            "kubesvcname": "service",
            "containerver": "latest",
            "containername": "testcontainer",
            "namespace": "kubens",
            "buildid": "1000"
        }
    

    If you would load your expected output and then dump it, you can see that ruamel.yaml can preserve the block style literal scalar. By inspecting the value for the key appsetting.json, you'll see that this is an instance of the class LiteralScalarString. That knowledge can be used to make only one scalar be dumped that way (instead of all scalars that have embedded newlines):

    import sys
    import json
    import ruamel.yaml
       
    yaml = ruamel.yaml.YAML()
    yaml.explicit_start = True
    
    envList={"env":"development", "region":"us", "tag":"latest", "kubesvcname":"service", "containerver": "latest", "containername":"testcontainer", "namespace":"kubens", "buildid":"1000"}
    jsonFile = json.dumps(envList, indent=4)
    configMapHeader={ \
        "apiVersion": "v1", \
        "kind":"ConfigMap", \
        "metadata": { \
            "name": f'{envList["kubesvcname"]}_{envList["containername"]}-{envList["containerver"]}-{envList["buildid"]}', \
            "namespace": f'{envList["namespace"]}'\
        },
        "data":{"appsettings.json": \
                ruamel.yaml.scalarstring.LiteralScalarString(jsonFile) }
    }
    
    yaml.dump(configMapHeader, sys.stdout)
    

    which gives:

    ---
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: service_testcontainer-latest-1000
      namespace: kubens
    data:
      appsettings.json: |-
        {
            "env": "development",
            "region": "us",
            "tag": "latest",
            "kubesvcname": "service",
            "containerver": "latest",
            "containername": "testcontainer",
            "namespace": "kubens",
            "buildid": "1000"
        }