google-cloud-stackdriverfluent-bitgoogle-anthos

Fluentbit not parsing jsonyPayload properly in Anthos cluster


Default fluentbit config for Anthos clusters on AWS

    [FILTER]
        Name                kubernetes
        Match               k8s_container.*
        Kube_URL            https://kubernetes.default.svc.cluster.local:443
        Kube_Tag_Prefix     k8s_container.
        Regex_Parser        k8s-container-custom-tag
        Annotations         Off
    [FILTER]
        Name    nest
        Match   *
        Operation nest
        Wildcard gke.googleapis.com*
        Nest_under logging.googleapis.com/labels
  fluent-bit.conf: |
    [SERVICE]
        Flush         1
        Log_Level     info
        Daemon        off
        Parsers_File  parsers.conf
        HTTP_Server   On
        HTTP_Listen   127.0.0.1
        HTTP_Port     29020
        storage.path               /var/log/fluent-bit-buffers/
        storage.sync               normal
        storage.checksum           off
        storage.backlog.mem_limit  10M
    @INCLUDE input-containers.conf
    @INCLUDE input-network-policy.conf
    @INCLUDE input-systemd.conf
    @INCLUDE filter-kubernetes.conf
    @INCLUDE output-stackdriver.conf
  input-containers.conf: |
    [INPUT]
        Name               tail
        Tag_Regex          var.log.containers.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
        Tag                k8s_container.<namespace_name>.<pod_name>.<container_name>
        Path               /var/log/containers/*_kube-system_*.log
        Parser             cri
        DB                 /var/log/fluent-bit-k8s-container-kube-system.db
        Buffer_Chunk_Size  512KB
        Buffer_Max_Size    5M
        Rotate_Wait        30
        Mem_Buf_Limit      30MB
        Skip_Long_Lines    On
        Refresh_Interval   60
        storage.type       filesystem
        Read_from_Head     True
    [INPUT]
        Name               tail
        Tag_Regex          var.log.containers.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
        Tag                k8s_container.<namespace_name>.<pod_name>.<container_name>
        Path               /var/log/containers/*_gke-connect_*.log
        Parser             cri
        DB                 /var/log/fluent-bit-k8s-container-gke-connect.db
        Buffer_Chunk_Size  512KB
        Buffer_Max_Size    2M
        Rotate_Wait        30
        Mem_Buf_Limit      30MB
        Skip_Long_Lines    On
        Refresh_Interval   60
        storage.type       filesystem
        Read_from_Head     True
    [INPUT]
        Name               tail
        Tag_Regex          var.log.containers.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
        Tag                k8s_container.<namespace_name>.<pod_name>.<container_name>
        Path               /var/log/containers/*_gke-system_*.log
        Parser             cri
        DB                 /var/log/fluent-bit-k8s-container-gke-system.db
        Buffer_Chunk_Size  512KB
        Buffer_Max_Size    2M
        Rotate_Wait        30
        Mem_Buf_Limit      30MB
        Skip_Long_Lines    On
        Refresh_Interval   60
        storage.type       filesystem
        Read_from_Head     True

    [INPUT]
        Name               tail
        Tag                k8s_application.<namespace_name>.<pod_name>.<container_name>
        Tag_Regex          var.log.containers.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
        Path               /var/log/containers/*.log
        Exclude_Path       *_kube-system_*.log,*_gke-connect_*.log,*_gke-system_*.log
        DB                 /var/log/fluent-bit-k8s-container-application.db
        Buffer_Max_Size    2MB
        Mem_Buf_Limit      30MB
        Skip_Long_Lines    On
        Rotate_Wait        30
        Refresh_Interval   5
        storage.type       filesystem
        Read_from_Head     True

    [FILTER]
        Name    record_modifier
        Match   *.kube-system.*
        Record  gke.googleapis.com/log_type system
    [FILTER]
        Name    record_modifier
        Match   *.gke-connect.*
        Record  gke.googleapis.com/log_type system
    [FILTER]
        Name    record_modifier
        Match   *.gke-system.*
        Record  gke.googleapis.com/log_type system
    [FILTER]
        Name          parser
        Match_Regex   .*(ebs-plugin|efs-plugin|node-driver-registrar|coredns-autoscaler|konnectivity-*|metrics-server).*
        Parser        glog
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          parser
        Match         *cilium*
        Parser        logfmt
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          modify
        Match         *cilium*
        Rename        level severity
    [FILTER]
        Name          parser
        Match         *fluentbit-gke*
        Parser        fluentbit
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          parser
        Match         *node-cache*
        Parser        node-cache
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          parser
        Match         *gke-metrics-agent
        Parser        gke-metrics-agent
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          modify
        Match         *gke-metrics-agent
        Rename        msg message
        Rename        level severity

    [FILTER]
        Name          modify
        Match         k8s_application*
        Hard_rename   log message
    
    [FILTER]
        Name          modify
        Match         k8s_application*
        Hard_rename   poruka message
    
    [FILTER]
        Name         parser
        Match        k8s_application*
        Key_Name     message
        Reserve_Data True
        Parser       cri
        Parser       appglog
        Parser       json
    [FILTER]
        Name         modify
        Match        k8s_application*
        Copy         level severity
    [FILTER]
        Name         modify
        Match        k8s_application*
        Condition    Key_value_equals stream stdout
        Add          severity I
    [FILTER]
        Name         modify
        Match        k8s_application*
        Condition    Key_value_equals stream stderr
        Add          severity E
  input-network-policy.conf: |
    [INPUT]
        Name             tail
        Parser           network-log
        Alias            policy-action
        Tag              policy-action
        Path             /var/log/network/policy_action.log
        DB               /var/log/fluent-bit-k8s-node-journald-policy_action.db
        Skip_Long_Lines  On
        Refresh_Interval 60
    [FILTER]
        Name                modify
        Match               policy-action
        Add                 logging.googleapis.com/local_resource_id k8s_node.${NODE_NAME}
        Add                 gke.googleapis.com/log_type system
  input-systemd.conf: |
    [INPUT]
        Name            systemd
        Tag             containerd
        Path            /var/log/journal
        DB              /var/log/fluent-bit-k8s-node-journald-containerd.db
        Systemd_Filter  _SYSTEMD_UNIT=containerd.service
        storage.type    filesystem
    [INPUT]
        Name            systemd
        Tag             kubelet
        Path            /var/log/journal
        DB              /var/log/fluent-bit-k8s-node-journald-kubelet.db
        Systemd_Filter  _SYSTEMD_UNIT=kubelet.service
        storage.type    filesystem
    [INPUT]
        Name            systemd
        Tag             node-agent
        Path            /var/log/journal
        DB              /var/log/fluent-bit-k8s-node-journald-node_agent.db
        Systemd_Filter  _SYSTEMD_UNIT=cloud-final.service
        storage.type    filesystem
    [FILTER]
        Name         modify
        Match_regex  .*(containerd|kubelet|node-agent)$
        Rename       MESSAGE message
    [FILTER]
        Name          parser
        Match         containerd
        Parser        containerd
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name          parser
        Match         kubelet
        Parser        glog
        Key_name      message
        Reserve_Data  true
    [FILTER]
        Name                modify
        Match_Regex         ^(containerd|kubelet|node-agent)$
        Add                 logging.googleapis.com/local_resource_id k8s_node.${NODE_NAME}
        Add                 gke.googleapis.com/log_type system
  output-stackdriver.conf: |
    [OUTPUT]
        Name                        stackdriver
        Match                       k8s_container.*
        Resource                    k8s_container
        k8s_cluster_name            aws-cluster1
        k8s_cluster_location        europe-west1
        custom_k8s_regex            ^(?<namespace_name>[^_.]+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<container_name>[^.]+)$
        storage.total_limit_size    1G
        Severity_key                severity
    [OUTPUT]
        Name                        stackdriver
        Match_Regex                 ^(containerd|kubelet|node-agent|policy-action)$
        Resource                    k8s_node
        k8s_cluster_name            aws-cluster1
        k8s_cluster_location        europe-west1
        custom_k8s_regex            ^(?<node_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)$
        storage.total_limit_size    1G
        Severity_key                severity
    [OUTPUT]
        Name                        stackdriver
        Match                       k8s_application.*
        Resource                    k8s_container
        k8s_cluster_name            aws-cluster1
        k8s_cluster_location        europe-west1
        custom_k8s_regex            ^(?<namespace_name>[^_.]+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<container_name>[^.]+)$
        tag_prefix                  k8s_application
        storage.total_limit_size    1G
        Severity_key                severity
  parsers.conf: |
    [PARSER]
        Name    k8s-container-custom-tag
        Format  regex
        Regex   ^(?<namespace_name>[^_.]+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<container_name>[^.]+)$
    [PARSER]
        Name        cri
        Format      regex
        Regex       ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<message>.*)$
        Time_Key    time
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z
    [PARSER]
        Name        network-log
        Format      json
        Time_Key    timestamp
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z
    [PARSER]
        Name        gke-metrics-agent
        Format      json
        Time_Key    ts
        Time_Format %s.%L
    [PARSER]
        Name        containerd
        Format      regex
        Regex       ^(time="(?<time>.+)" )?level=(?<severity>\w+) (?<message>.+)$
    [PARSER]
        Name        glog
        Format      regex
        Regex       ^((?<severity>\w)\d{4} [^\s]*\s+\d+\s+(?<source_file>[^ \]]+)\:(?<source_line>\d+)\]\s)?"?(?<message>.*)"?$
    [PARSER]
        Name        logfmt
        Format      logfmt
    [PARSER]
        Name        fluentbit
        Format      regex
        Regex       ^\[[\s\/:0-9]+\] \[\s*(?<severity>\w+)] (?<message>.*)$
    [PARSER]
        Name        node-cache
        Format      regex
        Regex       ^([\s:\/0-9]+)?\[\s*(?<severity>\w+)] (?<message>.*)$
    [PARSER]
        Name        json
        Format      json
    [PARSER]
        Name        appglog
        Format      regex
        Regex       ^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source_file>[^ \]]+)\:(?<source_line>\d+)\]\s(?<message>.*)$
        Time_Key    time
        Time_Format %m%d %H:%M:%S.%L%z

Using this command to test GCP Logging for both GKE & Anthos

kubectl run test-logging-app --quiet --restart=Never --image=alpine -- sh -c 'sleep 3; echo "{\"Test\": \"Hello from Kubernetes!\", \"level\": \"warn\"}"'
  1. Logs Explorer when running container in GKE
jsonPayload: {
  Test: "Hello from Kubernetes!"
  level: "warn"
}
  1. Logs Explorer when running container in AWS Anthos
jsonPayload: {
  logtag: "F"
  message: "{"Test": "Hello from Kubernetes!", "level": "warn"}"
}

logtag being properly mapped by cri parser, but for some reason it seems like appglog & json parsers do not work at all.


Solution

  • After many tweaks of the configuration, it seems we had to split parser [FILTER] for k8s_application*:

        [FILTER]
            Name         parser
            Match        k8s_application*
            Key_Name     message
            Reserve_Data True
            Parser       cri
            Parser       appglog
            Parser       json
    

    to

        [FILTER]
            Name         parser
            Match        k8s_application*
            Key_Name     message
            Reserve_Data True
            Parser       cri
        [FILTER]
            Name         parser
            Match        k8s_application*
            Key_Name     message
            Reserve_Data True
            Parser       appglog
            Parser       json
    

    That way appglog and json parsers started to work.