I want a solution in a docker-compose file. I took this docker-compose file and this one.
I came up with the following, but there is an issue with the services communicating with the Jaeger collector.
traces export: context deadline exceeded: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp 172.18.0.10:4317: connect: connection refused"
version: '3'
services:
service-a:
build:
context: .
dockerfile: service-a/Dockerfile
networks:
- service-jaeger
ports:
- "8081:8081"
environment:
- OUTBOUND_HOST_PORT=service-b:8082
- OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
- OTEL_EXPORTER_OTLP_INSECURE=true
service-b:
build:
context: .
dockerfile: service-b/Dockerfile
networks:
- service-jaeger
environment:
- OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
- OTEL_EXPORTER_OTLP_INSECURE=true
# Using ElasticSearch as a storage for traces and logs
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
networks:
- service-jaeger
environment:
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "bootstrap.memory_lock=true"
- "discovery.type=single-node"
- "xpack.security.enabled=false"
- "xpack.security.enrollment.enabled=false"
ports:
- "127.0.0.1:9200:9200"
restart: on-failure
volumes:
- esdata:/usr/share/elasticsearch/data
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
# Using Apache Kafka as a temporary storage and stream processing system (span post processing)
kafka:
image: confluentinc/cp-kafka:5.2.1
networks:
- service-jaeger
ports:
- "9092:9092"
environment:
KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT://kafka:9092"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_BROKER_ID: 1
restart: on-failure
# Using Apache Zookeeper to coordenate Apache services
zookeeper:
image: confluentinc/cp-zookeeper:5.2.1
networks:
- service-jaeger
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: "2181"
# Using Jaeger Collector to receive spans from Jaeger Agents and send them to Kafka
jaeger-collector:
image: jaegertracing/jaeger-collector:1.11.0
hostname: jaeger-collector
ports:
- "14269:14269"
- "14268:14268"
- "14267:14267"
- "9411:9411"
networks:
- service-jaeger
restart: on-failure
environment:
LOG_LEVEL: "info"
SPAN_STORAGE_TYPE: "kafka"
KAFKA_BROKERS: "kafka:9092"
METRICS_BACKEND: "prometheus"
depends_on:
- elasticsearch
# Using Jaeger Ingester to receive spans from Kafka and send them to ElasticSearch
jaeger-ingester:
image: jaegertracing/jaeger-ingester:1.11.0
networks:
- service-jaeger
ports:
- "14270:14270"
- "14271:14271"
restart: on-failure
environment:
LOG_LEVEL: "info"
INGESTER_PARALLELISM: "1"
INGESTER_DEADLOCKINTERVAL: "0ms"
SPAN_STORAGE_TYPE: "elasticsearch"
ES_SERVER_URLS: "http://elasticsearch:9200"
KAFKA_BROKERS: "kafka:9092"
METRICS_BACKEND: "prometheus"
depends_on:
- kafka
# Using Jaeger Agent to receive spans from clients locally and send to remote Jaeger Collector
jaeger-agent:
image: jaegertracing/jaeger-agent:1.11.0
hostname: jaeger-agent
command: ["--collector.host-port=jaeger-collector:14267"]
ports:
- "5775:5775/udp"
- "6831:6831/udp"
- "6832:6832/udp"
- "5778:5778"
networks:
- service-jaeger
restart: on-failure
environment:
SPAN_STORAGE_TYPE: "elasticsearch"
METRICS_BACKEND: "prometheus"
depends_on:
- jaeger-collector
#
# USER INTERFACES
#
# Using Jaeger Query to work with traces
jaeger-query:
image: jaegertracing/jaeger-query:1.11.0
environment:
SPAN_STORAGE_TYPE: "elasticsearch"
no_proxy: "localhost"
METRICS_BACKEND: "prometheus"
ports:
- "16686:16686"
- "16687:16687"
networks:
- service-jaeger
restart: on-failure
command: [
"--es.server-urls=http://elasticsearch:9200",
"--span-storage.type=elasticsearch",
"--log-level=debug",
"--query.ui-config=/usr/share/jaeger-query/jaeger-query-config.json"
]
volumes:
- ./jaeger-query-config.json:/usr/share/jaeger-query/jaeger-query-config.json:ro
depends_on:
- jaeger-agent
volumes:
esdata:
networks:
service-jaeger:
driver: bridge
You need to use a newer jaeger-collector
image. In the container logs for jaeger-collector:1.11.0
, it shows that the collector starts a gRPC server listening on port 14250
, not on port 4317
. And if OTEL_EXPORTER_OTLP_ENDPOINT
is changed to http://jaeger-collector:14250
, a different error traces export: rpc error: code = Unimplemented desc = unknown service opentelemetry.proto.collector.trace.v1.TraceService
occurs.
jaeger-collector:1.11.0 logs:
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.124311,"caller":"healthcheck/handler.go:99","msg":"Health Check server started","http-port":14269,"status":"unavailable"}
jaeger-go-example-jaeger-collector-1 | WARNING: found deprecated option kafka.brokers, please use kafka.producer.brokers instead
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1347585,"caller":"kafka/factory.go:62","msg":"Kafka factory","producer builder":{"Brokers":["kafka:9092"]},"topic":"jaeger-spans"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1638942,"caller":"static/strategy_store.go:79","msg":"No sampling strategies provided, using defaults"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.170055,"caller":"collector/main.go:140","msg":"Starting jaeger-collector TChannel server","port":14267}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1706026,"caller":"grpcserver/grpc_server.go:64","msg":"Starting jaeger-collector gRPC server","grpc-port":"14250"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1713939,"caller":"collector/main.go:154","msg":"Registering metrics handler with HTTP server","route":"/metrics"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1718876,"caller":"collector/main.go:163","msg":"Starting jaeger-collector HTTP server","http-port":14268}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715817606.1722622,"caller":"healthcheck/handler.go:133","msg":"Health Check state change","status":"ready"}
Once the collector is upgraded to the latest version, 1.57
, this error goes away as the newer version creates a otlpreceiver
gRPC server listening on port 4317
. And from the kafka container logs, I could see messages being published to jaeger-spans
topic.
jaeger-collector:1.57 logs:
jaeger-go-example-jaeger-collector-1 | 2024/05/16 00:41:06 maxprocs: Leaving GOMAXPROCS=8: CPU quota undefined
jaeger-go-example-jaeger-collector-1 | 2024/05/16 00:41:06 application version: git-commit=55e991a29725468164b11be5fc4e260dc09598d6, git-version=v1.57.0, build-date=2024-05-01T23:19:12Z
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820066.9970202,"caller":"flags/service.go:110","msg":"Mounting metrics handler on admin server","route":"/metrics"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820066.9974976,"caller":"flags/service.go:116","msg":"Mounting expvar handler on admin server","route":"/debug/vars"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820066.9984624,"caller":"flags/admin.go:130","msg":"Mounting health check on admin server","route":"/"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820066.9987607,"caller":"flags/admin.go:144","msg":"Starting admin HTTP server","http-addr":":14269"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0008337,"caller":"flags/admin.go:122","msg":"Admin server started","http.host-port":"[::]:14269","health-status":"unavailable"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0110576,"caller":"kafka/factory.go:77","msg":"Kafka factory","producer builder":{"Brokers":["kafka:9092"],"RequiredAcks":1,"Compression":"none","CompressionLevel":0,"ProtocolVersion":"","BatchLinger":0,"BatchSize":0,"BatchMinMessages":0,"BatchMaxMessages":0,"MaxMessageBytes":1000000,"Authentication":"none","Kerberos":{"ServiceName":"kafka","Realm":"","UseKeyTab":false,"Username":"","ConfigPath":"/etc/krb5.conf","KeyTabPath":"/etc/security/kafka.keytab","DisablePAFXFast":false},"TLS":{"Enabled":false,"CAPath":"","CertPath":"","KeyPath":"","ServerName":"","ClientCAPath":"","CipherSuites":null,"MinVersion":"","MaxVersion":"","SkipHostVerify":false,"ReloadInterval":0},"PlainText":{"Username":"","Mechanism":"PLAIN"}},"topic":"jaeger-spans"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0482032,"caller":"static/strategy_store.go:68","msg":"No sampling strategies source provided, using defaults"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.085572,"caller":"grpc@v1.63.2/server.go:675","msg":"[core][Server #1]Server created","system":"grpc","grpc_log":true}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0860224,"caller":"server/grpc.go:104","msg":"Starting jaeger-collector gRPC server","grpc.host-port":"[::]:14250"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0860848,"caller":"server/http.go:56","msg":"Starting jaeger-collector HTTP server","http host-port":":14268"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0868862,"caller":"grpc@v1.63.2/server.go:871","msg":"[core][Server #1 ListenSocket #2]ListenSocket created","system":"grpc","grpc_log":true}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0912595,"caller":"app/collector.go:146","msg":"Not listening for Zipkin HTTP traffic, port not configured"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.091624,"caller":"handler/otlp_receiver.go:77","msg":"OTLP receiver status change","status":"StatusStarting"}
jaeger-go-example-jaeger-collector-1 | {"level":"warn","ts":1715820067.091747,"caller":"internal@v0.98.0/warning.go:42","msg":"Using the 0.0.0.0 address exposes this server to every network interface, which may facilitate Denial of Service attacks. Enable the feature gate to change the default and remove this warning.","documentation":"https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks","feature gate ID":"component.UseLocalHostAsDefaultHost"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.09237,"caller":"grpc@v1.63.2/server.go:675","msg":"[core][Server #3]Server created","system":"grpc","grpc_log":true}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.0924554,"caller":"otlpreceiver@v0.98.0/otlp.go:102","msg":"Starting GRPC server","endpoint":"0.0.0.0:4317"}
jaeger-go-example-jaeger-collector-1 | {"level":"warn","ts":1715820067.1029644,"caller":"internal@v0.98.0/warning.go:42","msg":"Using the 0.0.0.0 address exposes this server to every network interface, which may facilitate Denial of Service attacks. Enable the feature gate to change the default and remove this warning.","documentation":"https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks","feature gate ID":"component.UseLocalHostAsDefaultHost"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.1030934,"caller":"otlpreceiver@v0.98.0/otlp.go:152","msg":"Starting HTTP server","endpoint":"0.0.0.0:4318"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.1046698,"caller":"healthcheck/handler.go:129","msg":"Health Check state change","status":"ready"}
jaeger-go-example-jaeger-collector-1 | {"level":"info","ts":1715820067.1038973,"caller":"grpc@v1.63.2/server.go:871","msg":"[core][Server #3 ListenSocket #4]ListenSocket created","system":"grpc","grpc_log":true}
Minimal docker-compose.yml:
services:
# Using Apache Zookeeper to co-ordinate Apache services
zookeeper:
image: confluentinc/cp-zookeeper:5.2.1
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: "2181"
# Using Apache Kafka as a temporary storage and stream processing system (span post processing)
kafka:
image: confluentinc/cp-kafka:5.2.1
ports:
- "9092:9092"
- "9093:9093"
environment:
KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_LISTENERS: INTERNAL://:9092,EXTERNAL://:9093
KAFKA_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,EXTERNAL://localhost:9093
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_BROKER_ID: 1
depends_on:
- zookeeper
healthcheck:
test: nc -z localhost 9092 || exit -1
start_period: 15s
interval: 5s
timeout: 10s
retries: 10
jaeger-collector:
image: jaegertracing/jaeger-collector:1.57
hostname: jaeger-collector
ports:
- "14269:14269"
- "14268:14268"
- "14267:14267"
- "9411:9411"
environment:
LOG_LEVEL: "info"
SPAN_STORAGE_TYPE: "kafka"
KAFKA_PRODUCER_BROKERS: "kafka:9092"
METRICS_BACKEND: "prometheus"
depends_on:
kafka:
condition: service_healthy
service-b:
build:
context: .
dockerfile: service-b/Dockerfile
environment:
- OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger-collector:4317
- OTEL_EXPORTER_OTLP_INSECURE=true
ports:
- '8082:8082'
depends_on:
- jaeger-collector