I am working on a project with Airbyte. Is pretty easy the deployment with docker compose following the documentation. I just had to clone the repository and use docker compose up.
When I clone the repository a lot of files are downloaded. When I run docker compose up the images are downloaded too.
I did a test with a new directory containing just the docker-compose.yaml, .env and flags.yml files, required to start the containers. Everything worked fine!
What are the implications of running the containers with just these 3 files?
Thanks in advance!
I checked the number of connectors and functionality, both cases worked perfectly.
I was able to get it setup by initially following the instructions located here: https://docs.airbyte.com/deploying-airbyte/local-deployment/
After running the ./run-ab-platform.sh
command it generates a docker-compose.yaml
, flags.yml
and an .env
file that I copied over to a location with no internet access along with the images.
Here is the files it produced with version 0.50.4
.env
file# This file only contains Docker relevant variables.
#
# Variables with defaults have been omitted to avoid duplication of defaults.
# The only exception to the non-default rule are env vars related to scaling.
#
# See https://github.com/airbytehq/airbyte/blob/master/airbyte-config/config-models/src/main/java/io/airbyte/config/Configs.java
# for the latest environment variables.
#
# # Contributors - please organise this env file according to the above linked file.
### SHARED ###
VERSION=0.50.5
# When using the airbyte-db via default docker image
CONFIG_ROOT=/data
DATA_DOCKER_MOUNT=airbyte_data
DB_DOCKER_MOUNT=airbyte_db
# Workspace storage for running jobs (logs, etc)
WORKSPACE_ROOT=/tmp/workspace
WORKSPACE_DOCKER_MOUNT=airbyte_workspace
# Local mount to access local files from filesystem
# todo (cgardens) - when we are mount raw directories instead of named volumes, *_DOCKER_MOUNT must
# be the same as *_ROOT.
# Issue: https://github.com/airbytehq/airbyte/issues/578
LOCAL_ROOT=/tmp/airbyte_local
LOCAL_DOCKER_MOUNT=/tmp/airbyte_local
# todo (cgardens) - hack to handle behavior change in docker compose. *_PARENT directories MUST
# already exist on the host filesystem and MUST be parents of *_ROOT.
# Issue: https://github.com/airbytehq/airbyte/issues/577
HACK_LOCAL_ROOT_PARENT=/tmp
# Proxy Configuration
# Set to empty values, e.g. "" to disable basic auth
BASIC_AUTH_USERNAME=airbyte
BASIC_AUTH_PASSWORD=password
BASIC_AUTH_PROXY_TIMEOUT=900
### DATABASE ###
# Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db
DATABASE_USER=docker
DATABASE_PASSWORD=docker
DATABASE_HOST=db
DATABASE_PORT=5432
DATABASE_DB=airbyte
# translate manually DATABASE_URL=jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB} (do not include the username or password here)
DATABASE_URL=jdbc:postgresql://db:5432/airbyte
JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.40.26.001
# Airbyte Internal Config Database, defaults to Job Database if empty. Explicitly left empty to mute docker compose warnings.
CONFIG_DATABASE_USER=
CONFIG_DATABASE_PASSWORD=
CONFIG_DATABASE_URL=
CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.40.23.002
### AIRBYTE SERVICES ###
TEMPORAL_HOST=airbyte-temporal:7233
INTERNAL_API_HOST=airbyte-server:8001
CONNECTOR_BUILDER_API_HOST=airbyte-connector-builder-server:80
WEBAPP_URL=http://localhost:8000/
# Although not present as an env var, required for webapp configuration.
CONNECTOR_BUILDER_API_URL=/connector-builder-api
### JOBS ###
# Relevant to scaling.
SYNC_JOB_MAX_ATTEMPTS=3
SYNC_JOB_MAX_TIMEOUT_DAYS=3
SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES=5
JOB_MAIN_CONTAINER_CPU_REQUEST=
JOB_MAIN_CONTAINER_CPU_LIMIT=
JOB_MAIN_CONTAINER_MEMORY_REQUEST=
JOB_MAIN_CONTAINER_MEMORY_LIMIT=
NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT=
NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST=
NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT=
NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST=
### LOGGING/MONITORING/TRACKING ###
TRACKING_STRATEGY=segment
JOB_ERROR_REPORTING_STRATEGY=logging
# Although not present as an env var, expected by Log4J configuration.
LOG_LEVEL=INFO
### APPLICATIONS ###
# Worker #
WORKERS_MICRONAUT_ENVIRONMENTS=control-plane
# Cron #
CRON_MICRONAUT_ENVIRONMENTS=control-plane
# Relevant to scaling.
MAX_SYNC_WORKERS=5
MAX_SPEC_WORKERS=5
MAX_CHECK_WORKERS=5
MAX_DISCOVER_WORKERS=5
MAX_NOTIFY_WORKERS=5
SHOULD_RUN_NOTIFY_WORKFLOWS=true
# Temporal Activity configuration
ACTIVITY_MAX_ATTEMPT=
ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS=
ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS=
WORKFLOW_FAILURE_RESTART_DELAY_SECONDS=
### FEATURE FLAGS ###
AUTO_DISABLE_FAILING_CONNECTIONS=false
FEATURE_FLAG_CLIENT=config
### MONITORING FLAGS ###
# Accepted values are datadog and otel (open telemetry)
METRIC_CLIENT=
# Useful only when metric client is set to be otel. Must start with http:// or https://.
OTEL_COLLECTOR_ENDPOINT="http://host.docker.internal:4317"
USE_STREAM_CAPABLE_STATE=true
AUTO_DETECT_SCHEMA=true
# To test local catalog changes, set the below variable to the path of your local catalog.
# https://docs.airbyte.com/connector-development/testing-connectors/testing-a-local-catalog-in-development
LOCAL_CONNECTOR_CATALOG_PATH=
docker-compose.yaml
fileversion: "3.8"
#https://github.com/compose-spec/compose-spec/blob/master/spec.md#using-extensions-as-fragments
x-logging: &default-logging
options:
max-size: "100m"
max-file: "5"
driver: json-file
services:
# hook in case we need to add init behavior
# every root service (no depends_on) should depend on init
init:
image: airbyte/init:${VERSION}
logging: *default-logging
container_name: init
command: /bin/sh -c "./scripts/create_mount_directories.sh /local_parent ${HACK_LOCAL_ROOT_PARENT} ${LOCAL_ROOT}"
environment:
- LOCAL_ROOT=${LOCAL_ROOT}
- HACK_LOCAL_ROOT_PARENT=${HACK_LOCAL_ROOT_PARENT}
volumes:
- ${HACK_LOCAL_ROOT_PARENT}:/local_parent
bootloader:
image: airbyte/bootloader:${VERSION}
logging: *default-logging
container_name: airbyte-bootloader
environment:
- AIRBYTE_VERSION=${VERSION}
- DATABASE_PASSWORD=${DATABASE_PASSWORD}
- DATABASE_URL=${DATABASE_URL}
- DATABASE_USER=${DATABASE_USER}
- LOG_LEVEL=${LOG_LEVEL}
- LOCAL_CONNECTOR_CATALOG_PATH=${LOCAL_CONNECTOR_CATALOG_PATH}
networks:
- airbyte_internal
depends_on:
init:
condition: service_completed_successfully
db:
image: airbyte/db:${VERSION}
logging: *default-logging
container_name: airbyte-db
restart: unless-stopped
environment:
- CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
- CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
- CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
- DATABASE_PASSWORD=${DATABASE_PASSWORD}
- DATABASE_URL=${DATABASE_URL}
- DATABASE_USER=${DATABASE_USER}
- POSTGRES_PASSWORD=${DATABASE_PASSWORD}
- POSTGRES_USER=${DATABASE_USER}
volumes:
- db:/var/lib/postgresql/data
networks:
- airbyte_internal
worker:
image: airbyte/worker:${VERSION}
logging: *default-logging
container_name: airbyte-worker
restart: unless-stopped
environment:
- AIRBYTE_VERSION=${VERSION}
- AUTO_DISABLE_FAILING_CONNECTIONS=${AUTO_DISABLE_FAILING_CONNECTIONS}
- CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
- CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
- CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
- CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
- CONFIG_ROOT=${CONFIG_ROOT}
- DATABASE_PASSWORD=${DATABASE_PASSWORD}
- DATABASE_URL=${DATABASE_URL}
- DATABASE_USER=${DATABASE_USER}
- DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
- FEATURE_FLAG_CLIENT=${FEATURE_FLAG_CLIENT}
- LAUNCHDARKLY_KEY=${LAUNCHDARKLY_KEY}
- INTERNAL_API_HOST=${INTERNAL_API_HOST}
- JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
- JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT}
- JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST}
- JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT}
- JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST}
- LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT}
- LOCAL_ROOT=${LOCAL_ROOT}
- LOG_LEVEL=${LOG_LEVEL}
- LOG_CONNECTOR_MESSAGES=${LOG_CONNECTOR_MESSAGES}
- MAX_CHECK_WORKERS=${MAX_CHECK_WORKERS}
- MAX_DISCOVER_WORKERS=${MAX_DISCOVER_WORKERS}
- MAX_SPEC_WORKERS=${MAX_SPEC_WORKERS}
- MAX_SYNC_WORKERS=${MAX_SYNC_WORKERS}
- MAX_NOTIFY_WORKERS=${MAX_NOTIFY_WORKERS}
- SHOULD_RUN_NOTIFY_WORKFLOWS=${SHOULD_RUN_NOTIFY_WORKFLOWS}
- NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT=${NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT}
- NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST=${NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST}
- NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT=${NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT}
- NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST=${NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST}
- SECRET_PERSISTENCE=${SECRET_PERSISTENCE}
- SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS}
- SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS}
- SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES=${SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES}
- TEMPORAL_HOST=${TEMPORAL_HOST}
- TRACKING_STRATEGY=${TRACKING_STRATEGY}
- WEBAPP_URL=${WEBAPP_URL}
- WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT}
- WORKSPACE_ROOT=${WORKSPACE_ROOT}
- METRIC_CLIENT=${METRIC_CLIENT}
- OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT}
- JOB_ERROR_REPORTING_STRATEGY=${JOB_ERROR_REPORTING_STRATEGY}
- JOB_ERROR_REPORTING_SENTRY_DSN=${JOB_ERROR_REPORTING_SENTRY_DSN}
- ACTIVITY_MAX_ATTEMPT=${ACTIVITY_MAX_ATTEMPT}
- ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS=${ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS}
- ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS=${ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS}
- WORKFLOW_FAILURE_RESTART_DELAY_SECONDS=${WORKFLOW_FAILURE_RESTART_DELAY_SECONDS}
- AUTO_DETECT_SCHEMA=${AUTO_DETECT_SCHEMA}
- USE_STREAM_CAPABLE_STATE=${USE_STREAM_CAPABLE_STATE}
- MICRONAUT_ENVIRONMENTS=${WORKERS_MICRONAUT_ENVIRONMENTS}
- APPLY_FIELD_SELECTION=${APPLY_FIELD_SELECTION}
- FIELD_SELECTION_WORKSPACES=${FIELD_SELECTION_WORKSPACES}
configs:
- flags
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- workspace:${WORKSPACE_ROOT}
- ${LOCAL_ROOT}:${LOCAL_ROOT}
ports:
- "9000"
networks:
- airbyte_internal
depends_on:
bootloader:
condition: service_completed_successfully
server:
image: airbyte/server:${VERSION}
logging: *default-logging
container_name: airbyte-server
restart: unless-stopped
environment:
- AIRBYTE_ROLE=${AIRBYTE_ROLE:-}
- AIRBYTE_VERSION=${VERSION}
- CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
- CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
- CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
- CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
- CONFIG_ROOT=${CONFIG_ROOT}
- DATABASE_PASSWORD=${DATABASE_PASSWORD}
- DATABASE_URL=${DATABASE_URL}
- DATABASE_USER=${DATABASE_USER}
- FEATURE_FLAG_CLIENT=${FEATURE_FLAG_CLIENT}
- LAUNCHDARKLY_KEY=${LAUNCHDARKLY_KEY}
- JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT}
- JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST}
- JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT}
- JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST}
- JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
- LOG_LEVEL=${LOG_LEVEL}
- NEW_SCHEDULER=${NEW_SCHEDULER}
- SECRET_PERSISTENCE=${SECRET_PERSISTENCE}
- TEMPORAL_HOST=${TEMPORAL_HOST}
- TRACKING_STRATEGY=${TRACKING_STRATEGY}
- JOB_ERROR_REPORTING_STRATEGY=${JOB_ERROR_REPORTING_STRATEGY}
- JOB_ERROR_REPORTING_SENTRY_DSN=${JOB_ERROR_REPORTING_SENTRY_DSN}
- WEBAPP_URL=${WEBAPP_URL}
- WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT}
- WORKSPACE_ROOT=${WORKSPACE_ROOT}
- GITHUB_STORE_BRANCH=${GITHUB_STORE_BRANCH}
- MICRONAUT_ENVIRONMENTS=${WORKERS_MICRONAUT_ENVIRONMENTS}
- AUTO_DETECT_SCHEMA=${AUTO_DETECT_SCHEMA}
- MAX_NOTIFY_WORKERS=5
- SHOULD_RUN_NOTIFY_WORKFLOWS=${SHOULD_RUN_NOTIFY_WORKFLOWS}
ports:
- "8001"
configs:
- flags
volumes:
- workspace:${WORKSPACE_ROOT}
- data:${CONFIG_ROOT}
- ${LOCAL_ROOT}:${LOCAL_ROOT}
networks:
- airbyte_internal
depends_on:
bootloader:
condition: service_completed_successfully
webapp:
image: airbyte/webapp:${VERSION}
logging: *default-logging
container_name: airbyte-webapp
restart: unless-stopped
ports:
- "80"
environment:
- INTERNAL_API_HOST=${INTERNAL_API_HOST}
- CONNECTOR_BUILDER_API_HOST=${CONNECTOR_BUILDER_API_HOST}
- TRACKING_STRATEGY=${TRACKING_STRATEGY}
networks:
- airbyte_internal
depends_on:
bootloader:
condition: service_completed_successfully
airbyte-temporal:
image: airbyte/temporal:${VERSION}
logging: *default-logging
container_name: airbyte-temporal
restart: unless-stopped
environment:
- DB=postgresql
- DB_PORT=${DATABASE_PORT}
- DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development.yaml
- LOG_LEVEL=${LOG_LEVEL}
- POSTGRES_PWD=${DATABASE_PASSWORD}
- POSTGRES_SEEDS=${DATABASE_HOST}
- POSTGRES_USER=${DATABASE_USER}
volumes:
- ./temporal/dynamicconfig:/etc/temporal/config/dynamicconfig
networks:
- airbyte_internal
airbyte-cron:
image: airbyte/cron:${VERSION}
logging: *default-logging
container_name: airbyte-cron
restart: unless-stopped
environment:
- AIRBYTE_VERSION=${VERSION}
- CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION}
- DATABASE_PASSWORD=${DATABASE_PASSWORD}
- DATABASE_URL=${DATABASE_URL}
- DATABASE_USER=${DATABASE_USER}
- DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
- LOG_LEVEL=${LOG_LEVEL}
- REMOTE_CONNECTOR_CATALOG_URL=${REMOTE_CONNECTOR_CATALOG_URL}
- TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS}
- UPDATE_DEFINITIONS_CRON_ENABLED=${UPDATE_DEFINITIONS_CRON_ENABLED}
- WORKSPACE_ROOT=${WORKSPACE_ROOT}
- MICRONAUT_ENVIRONMENTS=${CRON_MICRONAUT_ENVIRONMENTS}
configs:
- flags
volumes:
- workspace:${WORKSPACE_ROOT}
networks:
- airbyte_internal
depends_on:
bootloader:
condition: service_completed_successfully
airbyte-connector-builder-server:
image: airbyte/connector-atelier-server:${VERSION}
logging: *default-logging
container_name: airbyte-connector-builder-server
restart: unless-stopped
ports:
- 80
environment:
- AIRBYTE_VERSION=${VERSION}
- CDK_VERSION=${CDK_VERSION}
- DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
- PYTHON_VERSION=${PYTHON_VERSION}
networks:
- airbyte_internal
depends_on:
bootloader:
condition: service_completed_successfully
airbyte-proxy:
image: airbyte/proxy:${VERSION}
container_name: airbyte-proxy
restart: unless-stopped
ports:
- "8000:8000"
- "8001:8001"
- "8003:8003"
environment:
- BASIC_AUTH_USERNAME=${BASIC_AUTH_USERNAME}
- BASIC_AUTH_PASSWORD=${BASIC_AUTH_PASSWORD}
- BASIC_AUTH_PROXY_TIMEOUT=${BASIC_AUTH_PROXY_TIMEOUT}
networks:
- airbyte_internal
- airbyte_public
depends_on:
- webapp
- server
volumes:
workspace:
name: ${WORKSPACE_DOCKER_MOUNT}
# the data volume is only needed for backward compatibility; when users upgrade
# from an old Airbyte version that relies on file-based configs, the server needs
# to read this volume to copy their configs to the database
data:
name: ${DATA_DOCKER_MOUNT}
db:
name: ${DB_DOCKER_MOUNT}
configs:
flags:
file: ./flags.yml
networks:
airbyte_public:
airbyte_internal:
flags.yml
flags:
- name: performance.backgroundJsonSchemaValidation
serve: false
- name: heartbeat.failSync
serve: true
- name: platform.commitStatsAsap
serve: true
- name: connectors.versionOverridesEnabled
serve: true
- name: billing.newTrialPolicy
serve: false
- name: heartbeat-max-seconds-between-messages
serve: "10800"
- name: use-new-notification-workflow
serve: false
- name: validation.removeValidationLimit
serve: false
- name: autopropagation.enabled
serve: true
- name: connection.columnSelection
serve: true
- name: refreshSchema.period.hours
serve: 24
- name: concurrent.source.stream.read
serve: false
- name: platform.add-scheduling-jitter
serve: false