godocker-composeopen-telemetryjaeger

Setup Jaeger in docker-compose - not with all-in-one image


I want a solution in a docker-compose file. I took this docker-compose file and this one.

I came up with the following, but there is an issue with the services communicating with the Jaeger collector.

traces export: context deadline exceeded: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp 172.18.0.10:4317: connect: connection refused"

version: '3'
services:
  service-a:
    build:
      context: .
      dockerfile: service-a/Dockerfile
    networks:
      - service-jaeger
    ports:
      - "8081:8081"
    environment:
      - OUTBOUND_HOST_PORT=service-b:8082
      - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
      - OTEL_EXPORTER_OTLP_INSECURE=true

  service-b:
    build:
      context: .
      dockerfile: service-b/Dockerfile
    networks:
      - service-jaeger
    environment:
      - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
      - OTEL_EXPORTER_OTLP_INSECURE=true

  # Using ElasticSearch as a storage for traces and logs
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
    networks:
      - service-jaeger
    environment:
      - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
      - "bootstrap.memory_lock=true"
      - "discovery.type=single-node"
      - "xpack.security.enabled=false"
      - "xpack.security.enrollment.enabled=false"
    ports:
      - "127.0.0.1:9200:9200"
    restart: on-failure
    volumes:
      - esdata:/usr/share/elasticsearch/data
    ulimits:
      memlock:
        soft: -1
        hard: -1
      nofile:
        soft: 65536
        hard: 65536

  # Using Apache Kafka as a temporary storage and stream processing system (span post processing)
  kafka:
    image: confluentinc/cp-kafka:5.2.1
    networks:
      - service-jaeger
    ports:
      - "9092:9092"
    environment:
      KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
      KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT://kafka:9092"
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_BROKER_ID: 1
    restart: on-failure

  # Using Apache Zookeeper to coordenate Apache services
  zookeeper:
    image: confluentinc/cp-zookeeper:5.2.1
    networks:
      - service-jaeger
    ports:
      - "2181:2181"
    environment:
      ZOOKEEPER_CLIENT_PORT: "2181"

  # Using Jaeger Collector to receive spans from Jaeger Agents and send them to Kafka
  jaeger-collector:
    image: jaegertracing/jaeger-collector:1.11.0
    hostname: jaeger-collector
    ports:
      - "14269:14269"
      - "14268:14268"
      - "14267:14267"
      - "9411:9411"
    networks:
      - service-jaeger
    restart: on-failure
    environment:
      LOG_LEVEL: "info"
      SPAN_STORAGE_TYPE: "kafka"
      KAFKA_BROKERS: "kafka:9092"
      METRICS_BACKEND: "prometheus"
    depends_on:
      - elasticsearch

  # Using Jaeger Ingester to receive spans from Kafka and send them to ElasticSearch
  jaeger-ingester:
    image: jaegertracing/jaeger-ingester:1.11.0
    networks:
      - service-jaeger
    ports:
      - "14270:14270"
      - "14271:14271"
    restart: on-failure
    environment:
      LOG_LEVEL: "info"
      INGESTER_PARALLELISM: "1"
      INGESTER_DEADLOCKINTERVAL: "0ms"
      SPAN_STORAGE_TYPE: "elasticsearch"
      ES_SERVER_URLS: "http://elasticsearch:9200"
      KAFKA_BROKERS: "kafka:9092"
      METRICS_BACKEND: "prometheus"
    depends_on:
      - kafka

  # Using Jaeger Agent to receive spans from clients locally and send to remote Jaeger Collector
  jaeger-agent:
    image: jaegertracing/jaeger-agent:1.11.0
    hostname: jaeger-agent
    command: ["--collector.host-port=jaeger-collector:14267"]
    ports:
      - "5775:5775/udp"
      - "6831:6831/udp"
      - "6832:6832/udp"
      - "5778:5778"
    networks:
      - service-jaeger
    restart: on-failure
    environment:
      SPAN_STORAGE_TYPE: "elasticsearch"
      METRICS_BACKEND: "prometheus"
    depends_on:
      - jaeger-collector

  #
  # USER INTERFACES
  #

  # Using Jaeger Query to work with traces
  jaeger-query:
    image: jaegertracing/jaeger-query:1.11.0
    environment:
      SPAN_STORAGE_TYPE: "elasticsearch"
      no_proxy: "localhost"
      METRICS_BACKEND: "prometheus"
    ports:
      - "16686:16686"
      - "16687:16687"
    networks:
      - service-jaeger
    restart: on-failure
    command: [
      "--es.server-urls=http://elasticsearch:9200",
      "--span-storage.type=elasticsearch",
      "--log-level=debug",
      "--query.ui-config=/usr/share/jaeger-query/jaeger-query-config.json"
    ]
    volumes:
      - ./jaeger-query-config.json:/usr/share/jaeger-query/jaeger-query-config.json:ro
    depends_on:
      - jaeger-agent

volumes:
  esdata:

networks:
  service-jaeger:
    driver: bridge

Solution

  • You need to use a newer jaeger-collector image. In the container logs for jaeger-collector:1.11.0, it shows that the collector starts a gRPC server listening on port 14250, not on port 4317. And if OTEL_EXPORTER_OTLP_ENDPOINT is changed to http://jaeger-collector:14250, a different error traces export: rpc error: code = Unimplemented desc = unknown service opentelemetry.proto.collector.trace.v1.TraceService occurs.

    jaeger-collector:1.11.0 logs:

    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.124311,"caller":"healthcheck/handler.go:99","msg":"Health Check server started","http-port":14269,"status":"unavailable"}
    jaeger-go-example-jaeger-collector-1  | WARNING: found deprecated option kafka.brokers, please use kafka.producer.brokers instead
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1347585,"caller":"kafka/factory.go:62","msg":"Kafka factory","producer builder":{"Brokers":["kafka:9092"]},"topic":"jaeger-spans"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1638942,"caller":"static/strategy_store.go:79","msg":"No sampling strategies provided, using defaults"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.170055,"caller":"collector/main.go:140","msg":"Starting jaeger-collector TChannel server","port":14267}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1706026,"caller":"grpcserver/grpc_server.go:64","msg":"Starting jaeger-collector gRPC server","grpc-port":"14250"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1713939,"caller":"collector/main.go:154","msg":"Registering metrics handler with HTTP server","route":"/metrics"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1718876,"caller":"collector/main.go:163","msg":"Starting jaeger-collector HTTP server","http-port":14268}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715817606.1722622,"caller":"healthcheck/handler.go:133","msg":"Health Check state change","status":"ready"}
    

    Once the collector is upgraded to the latest version, 1.57, this error goes away as the newer version creates a otlpreceiver gRPC server listening on port 4317. And from the kafka container logs, I could see messages being published to jaeger-spans topic.

    jaeger-collector:1.57 logs:

    jaeger-go-example-jaeger-collector-1  | 2024/05/16 00:41:06 maxprocs: Leaving GOMAXPROCS=8: CPU quota undefined
    jaeger-go-example-jaeger-collector-1  | 2024/05/16 00:41:06 application version: git-commit=55e991a29725468164b11be5fc4e260dc09598d6, git-version=v1.57.0, build-date=2024-05-01T23:19:12Z
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820066.9970202,"caller":"flags/service.go:110","msg":"Mounting metrics handler on admin server","route":"/metrics"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820066.9974976,"caller":"flags/service.go:116","msg":"Mounting expvar handler on admin server","route":"/debug/vars"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820066.9984624,"caller":"flags/admin.go:130","msg":"Mounting health check on admin server","route":"/"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820066.9987607,"caller":"flags/admin.go:144","msg":"Starting admin HTTP server","http-addr":":14269"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0008337,"caller":"flags/admin.go:122","msg":"Admin server started","http.host-port":"[::]:14269","health-status":"unavailable"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0110576,"caller":"kafka/factory.go:77","msg":"Kafka factory","producer builder":{"Brokers":["kafka:9092"],"RequiredAcks":1,"Compression":"none","CompressionLevel":0,"ProtocolVersion":"","BatchLinger":0,"BatchSize":0,"BatchMinMessages":0,"BatchMaxMessages":0,"MaxMessageBytes":1000000,"Authentication":"none","Kerberos":{"ServiceName":"kafka","Realm":"","UseKeyTab":false,"Username":"","ConfigPath":"/etc/krb5.conf","KeyTabPath":"/etc/security/kafka.keytab","DisablePAFXFast":false},"TLS":{"Enabled":false,"CAPath":"","CertPath":"","KeyPath":"","ServerName":"","ClientCAPath":"","CipherSuites":null,"MinVersion":"","MaxVersion":"","SkipHostVerify":false,"ReloadInterval":0},"PlainText":{"Username":"","Mechanism":"PLAIN"}},"topic":"jaeger-spans"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0482032,"caller":"static/strategy_store.go:68","msg":"No sampling strategies source provided, using defaults"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.085572,"caller":"grpc@v1.63.2/server.go:675","msg":"[core][Server #1]Server created","system":"grpc","grpc_log":true}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0860224,"caller":"server/grpc.go:104","msg":"Starting jaeger-collector gRPC server","grpc.host-port":"[::]:14250"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0860848,"caller":"server/http.go:56","msg":"Starting jaeger-collector HTTP server","http host-port":":14268"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0868862,"caller":"grpc@v1.63.2/server.go:871","msg":"[core][Server #1 ListenSocket #2]ListenSocket created","system":"grpc","grpc_log":true}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0912595,"caller":"app/collector.go:146","msg":"Not listening for Zipkin HTTP traffic, port not configured"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.091624,"caller":"handler/otlp_receiver.go:77","msg":"OTLP receiver status change","status":"StatusStarting"}
    jaeger-go-example-jaeger-collector-1  | {"level":"warn","ts":1715820067.091747,"caller":"internal@v0.98.0/warning.go:42","msg":"Using the 0.0.0.0 address exposes this server to every network interface, which may facilitate Denial of Service attacks. Enable the feature gate to change the default and remove this warning.","documentation":"https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks","feature gate ID":"component.UseLocalHostAsDefaultHost"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.09237,"caller":"grpc@v1.63.2/server.go:675","msg":"[core][Server #3]Server created","system":"grpc","grpc_log":true}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.0924554,"caller":"otlpreceiver@v0.98.0/otlp.go:102","msg":"Starting GRPC server","endpoint":"0.0.0.0:4317"}
    jaeger-go-example-jaeger-collector-1  | {"level":"warn","ts":1715820067.1029644,"caller":"internal@v0.98.0/warning.go:42","msg":"Using the 0.0.0.0 address exposes this server to every network interface, which may facilitate Denial of Service attacks. Enable the feature gate to change the default and remove this warning.","documentation":"https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks","feature gate ID":"component.UseLocalHostAsDefaultHost"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.1030934,"caller":"otlpreceiver@v0.98.0/otlp.go:152","msg":"Starting HTTP server","endpoint":"0.0.0.0:4318"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.1046698,"caller":"healthcheck/handler.go:129","msg":"Health Check state change","status":"ready"}
    jaeger-go-example-jaeger-collector-1  | {"level":"info","ts":1715820067.1038973,"caller":"grpc@v1.63.2/server.go:871","msg":"[core][Server #3 ListenSocket #4]ListenSocket created","system":"grpc","grpc_log":true}
    

    Minimal docker-compose.yml:

    services:
      # Using Apache Zookeeper to co-ordinate Apache services
      zookeeper:
        image: confluentinc/cp-zookeeper:5.2.1
        ports:
          - "2181:2181"
        environment:
          ZOOKEEPER_CLIENT_PORT: "2181"
        
    
      # Using Apache Kafka as a temporary storage and stream processing system (span post processing)
      kafka:
        image: confluentinc/cp-kafka:5.2.1
        ports:
          - "9092:9092"
          - "9093:9093"
        environment:
          KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
          KAFKA_LISTENERS: INTERNAL://:9092,EXTERNAL://:9093
          KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,EXTERNAL://localhost:9093
          KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT
          KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL
          KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
          KAFKA_BROKER_ID: 1
        depends_on:
          - zookeeper
        healthcheck:
          test: nc -z localhost 9092 || exit -1
          start_period: 15s
          interval: 5s
          timeout: 10s
          retries: 10
    
      jaeger-collector:
        image: jaegertracing/jaeger-collector:1.57
        hostname: jaeger-collector
        ports:
          - "14269:14269"
          - "14268:14268"
          - "14267:14267"
          - "9411:9411"
        environment:
          LOG_LEVEL: "info"
          SPAN_STORAGE_TYPE: "kafka"
          KAFKA_PRODUCER_BROKERS: "kafka:9092"
          METRICS_BACKEND: "prometheus"
        depends_on:
          kafka:
            condition: service_healthy
    
      service-b:
        build:
          context: .
          dockerfile: service-b/Dockerfile
        environment:
          - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
          - OTEL_EXPORTER_OTLP_INSECURE=true
        ports:
          - '8082:8082'
        depends_on:
          - jaeger-collector