docker-composeairbyte

Deploying Airbyte with just docker-compose.yaml and varibles


I am working on a project with Airbyte. Is pretty easy the deployment with docker compose following the documentation. I just had to clone the repository and use docker compose up.

When I clone the repository a lot of files are downloaded. When I run docker compose up the images are downloaded too.

I did a test with a new directory containing just the docker-compose.yaml, .env and flags.yml files, required to start the containers. Everything worked fine!

What are the implications of running the containers with just these 3 files?

Thanks in advance!

I checked the number of connectors and functionality, both cases worked perfectly.


Solution

  • I was able to get it setup by initially following the instructions located here: https://docs.airbyte.com/deploying-airbyte/local-deployment/

    After running the ./run-ab-platform.sh command it generates a docker-compose.yaml, flags.yml and an .env file that I copied over to a location with no internet access along with the images.

    Here is the files it produced with version 0.50.4

    .env file

    # This file only contains Docker relevant variables.
    #
    # Variables with defaults have been omitted to avoid duplication of defaults.
    # The only exception to the non-default rule are env vars related to scaling.
    #
    # See https://github.com/airbytehq/airbyte/blob/master/airbyte-config/config-models/src/main/java/io/airbyte/config/Configs.java
    # for the latest environment variables.
    #
    # # Contributors - please organise this env file according to the above linked file.
    
    
    ### SHARED ###
    VERSION=0.50.5
    
    # When using the airbyte-db via default docker image
    CONFIG_ROOT=/data
    DATA_DOCKER_MOUNT=airbyte_data
    DB_DOCKER_MOUNT=airbyte_db
    
    # Workspace storage for running jobs (logs, etc)
    WORKSPACE_ROOT=/tmp/workspace
    WORKSPACE_DOCKER_MOUNT=airbyte_workspace
    
    # Local mount to access local files from filesystem
    # todo (cgardens) - when we are mount raw directories instead of named volumes, *_DOCKER_MOUNT must
    # be the same as *_ROOT.
    # Issue: https://github.com/airbytehq/airbyte/issues/578
    LOCAL_ROOT=/tmp/airbyte_local
    LOCAL_DOCKER_MOUNT=/tmp/airbyte_local
    # todo (cgardens) - hack to handle behavior change in docker compose. *_PARENT directories MUST
    # already exist on the host filesystem and MUST be parents of *_ROOT.
    # Issue: https://github.com/airbytehq/airbyte/issues/577
    HACK_LOCAL_ROOT_PARENT=/tmp
    
    # Proxy Configuration
    # Set to empty values, e.g. "" to disable basic auth
    BASIC_AUTH_USERNAME=airbyte
    BASIC_AUTH_PASSWORD=password
    BASIC_AUTH_PROXY_TIMEOUT=900
    
    ### DATABASE ###
    # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db
    DATABASE_USER=docker
    DATABASE_PASSWORD=docker
    DATABASE_HOST=db
    DATABASE_PORT=5432
    DATABASE_DB=airbyte
    # translate manually DATABASE_URL=jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB} (do not include the username or password here)
    DATABASE_URL=jdbc:postgresql://db:5432/airbyte
    JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.40.26.001
    
    # Airbyte Internal Config Database, defaults to Job Database if empty. Explicitly left empty to mute docker compose warnings.
    CONFIG_DATABASE_USER=
    CONFIG_DATABASE_PASSWORD=
    CONFIG_DATABASE_URL=
    CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.40.23.002
    
    ### AIRBYTE SERVICES ###
    TEMPORAL_HOST=airbyte-temporal:7233
    INTERNAL_API_HOST=airbyte-server:8001
    CONNECTOR_BUILDER_API_HOST=airbyte-connector-builder-server:80
    WEBAPP_URL=http://localhost:8000/
    # Although not present as an env var, required for webapp configuration.
    CONNECTOR_BUILDER_API_URL=/connector-builder-api
    
    ### JOBS ###
    # Relevant to scaling.
    SYNC_JOB_MAX_ATTEMPTS=3
    SYNC_JOB_MAX_TIMEOUT_DAYS=3
    SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES=5
    JOB_MAIN_CONTAINER_CPU_REQUEST=
    JOB_MAIN_CONTAINER_CPU_LIMIT=
    JOB_MAIN_CONTAINER_MEMORY_REQUEST=
    JOB_MAIN_CONTAINER_MEMORY_LIMIT=
    
    NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT=
    NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST=
    NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT=
    NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST=
    
    ### LOGGING/MONITORING/TRACKING ###
    TRACKING_STRATEGY=segment
    JOB_ERROR_REPORTING_STRATEGY=logging
    # Although not present as an env var, expected by Log4J configuration.
    LOG_LEVEL=INFO
    
    
    ### APPLICATIONS ###
    # Worker #
    WORKERS_MICRONAUT_ENVIRONMENTS=control-plane
    # Cron #
    CRON_MICRONAUT_ENVIRONMENTS=control-plane
    # Relevant to scaling.
    MAX_SYNC_WORKERS=5
    MAX_SPEC_WORKERS=5
    MAX_CHECK_WORKERS=5
    MAX_DISCOVER_WORKERS=5
    MAX_NOTIFY_WORKERS=5
    SHOULD_RUN_NOTIFY_WORKFLOWS=true
    # Temporal Activity configuration
    ACTIVITY_MAX_ATTEMPT=
    ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS=
    ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS=
    WORKFLOW_FAILURE_RESTART_DELAY_SECONDS=
    
    ### FEATURE FLAGS ###
    AUTO_DISABLE_FAILING_CONNECTIONS=false
    
    FEATURE_FLAG_CLIENT=config
    
    ### MONITORING FLAGS ###
    # Accepted values are datadog and otel (open telemetry)
    METRIC_CLIENT=
    # Useful only when metric client is set to be otel. Must start with http:// or https://.
    OTEL_COLLECTOR_ENDPOINT="http://host.docker.internal:4317"
    
    USE_STREAM_CAPABLE_STATE=true
    AUTO_DETECT_SCHEMA=true
    
    # To test local catalog changes, set the below variable to the path of your local catalog.
    # https://docs.airbyte.com/connector-development/testing-connectors/testing-a-local-catalog-in-development
    LOCAL_CONNECTOR_CATALOG_PATH=
    
    

    docker-compose.yaml file

    version: "3.8"
    #https://github.com/compose-spec/compose-spec/blob/master/spec.md#using-extensions-as-fragments
    x-logging: &default-logging
      options:
        max-size: "100m"
        max-file: "5"
      driver: json-file
    services:
      # hook in case we need to add init behavior
      # every root service (no depends_on) should depend on init
      init:
        image: airbyte/init:${VERSION}
        logging: *default-logging
        container_name: init
        command: /bin/sh -c "./scripts/create_mount_directories.sh /local_parent ${HACK_LOCAL_ROOT_PARENT} ${LOCAL_ROOT}"
        environment:
          - LOCAL_ROOT=${LOCAL_ROOT}
          - HACK_LOCAL_ROOT_PARENT=${HACK_LOCAL_ROOT_PARENT}
        volumes:
          - ${HACK_LOCAL_ROOT_PARENT}:/local_parent
      bootloader:
        image: airbyte/bootloader:${VERSION}
        logging: *default-logging
        container_name: airbyte-bootloader
        environment:
          - AIRBYTE_VERSION=${VERSION}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=${DATABASE_URL}
          - DATABASE_USER=${DATABASE_USER}
          - LOG_LEVEL=${LOG_LEVEL}
          - LOCAL_CONNECTOR_CATALOG_PATH=${LOCAL_CONNECTOR_CATALOG_PATH}
        networks:
          - airbyte_internal
        depends_on:
          init:
            condition: service_completed_successfully
      db:
        image: airbyte/db:${VERSION}
        logging: *default-logging
        container_name: airbyte-db
        restart: unless-stopped
        environment:
          - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
          - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
          - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=${DATABASE_URL}
          - DATABASE_USER=${DATABASE_USER}
          - POSTGRES_PASSWORD=${DATABASE_PASSWORD}
          - POSTGRES_USER=${DATABASE_USER}
        volumes:
          - db:/var/lib/postgresql/data
        networks:
          - airbyte_internal
      worker:
        image: airbyte/worker:${VERSION}
        logging: *default-logging
        container_name: airbyte-worker
        restart: unless-stopped
        environment:
          - AIRBYTE_VERSION=${VERSION}
          - AUTO_DISABLE_FAILING_CONNECTIONS=${AUTO_DISABLE_FAILING_CONNECTIONS}
          - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
          - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
          - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
          - CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
          - CONFIG_ROOT=${CONFIG_ROOT}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=${DATABASE_URL}
          - DATABASE_USER=${DATABASE_USER}
          - DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
          - FEATURE_FLAG_CLIENT=${FEATURE_FLAG_CLIENT}
          - LAUNCHDARKLY_KEY=${LAUNCHDARKLY_KEY}
          - INTERNAL_API_HOST=${INTERNAL_API_HOST}
          - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
          - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT}
          - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST}
          - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT}
          - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST}
          - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT}
          - LOCAL_ROOT=${LOCAL_ROOT}
          - LOG_LEVEL=${LOG_LEVEL}
          - LOG_CONNECTOR_MESSAGES=${LOG_CONNECTOR_MESSAGES}
          - MAX_CHECK_WORKERS=${MAX_CHECK_WORKERS}
          - MAX_DISCOVER_WORKERS=${MAX_DISCOVER_WORKERS}
          - MAX_SPEC_WORKERS=${MAX_SPEC_WORKERS}
          - MAX_SYNC_WORKERS=${MAX_SYNC_WORKERS}
          - MAX_NOTIFY_WORKERS=${MAX_NOTIFY_WORKERS}
          - SHOULD_RUN_NOTIFY_WORKFLOWS=${SHOULD_RUN_NOTIFY_WORKFLOWS}
          - NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT=${NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_LIMIT}
          - NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST=${NORMALIZATION_JOB_MAIN_CONTAINER_MEMORY_REQUEST}
          - NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT=${NORMALIZATION_JOB_MAIN_CONTAINER_CPU_LIMIT}
          - NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST=${NORMALIZATION_JOB_MAIN_CONTAINER_CPU_REQUEST}
          - SECRET_PERSISTENCE=${SECRET_PERSISTENCE}
          - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS}
          - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS}
          - SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES=${SYNC_JOB_INIT_RETRY_TIMEOUT_MINUTES}
          - TEMPORAL_HOST=${TEMPORAL_HOST}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
          - WEBAPP_URL=${WEBAPP_URL}
          - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT}
          - WORKSPACE_ROOT=${WORKSPACE_ROOT}
          - METRIC_CLIENT=${METRIC_CLIENT}
          - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT}
          - JOB_ERROR_REPORTING_STRATEGY=${JOB_ERROR_REPORTING_STRATEGY}
          - JOB_ERROR_REPORTING_SENTRY_DSN=${JOB_ERROR_REPORTING_SENTRY_DSN}
          - ACTIVITY_MAX_ATTEMPT=${ACTIVITY_MAX_ATTEMPT}
          - ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS=${ACTIVITY_INITIAL_DELAY_BETWEEN_ATTEMPTS_SECONDS}
          - ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS=${ACTIVITY_MAX_DELAY_BETWEEN_ATTEMPTS_SECONDS}
          - WORKFLOW_FAILURE_RESTART_DELAY_SECONDS=${WORKFLOW_FAILURE_RESTART_DELAY_SECONDS}
          - AUTO_DETECT_SCHEMA=${AUTO_DETECT_SCHEMA}
          - USE_STREAM_CAPABLE_STATE=${USE_STREAM_CAPABLE_STATE}
          - MICRONAUT_ENVIRONMENTS=${WORKERS_MICRONAUT_ENVIRONMENTS}
          - APPLY_FIELD_SELECTION=${APPLY_FIELD_SELECTION}
          - FIELD_SELECTION_WORKSPACES=${FIELD_SELECTION_WORKSPACES}
        configs:
          - flags
        volumes:
          - /var/run/docker.sock:/var/run/docker.sock
          - workspace:${WORKSPACE_ROOT}
          - ${LOCAL_ROOT}:${LOCAL_ROOT}
        ports:
          - "9000"
        networks:
          - airbyte_internal
        depends_on:
          bootloader:
            condition: service_completed_successfully
      server:
        image: airbyte/server:${VERSION}
        logging: *default-logging
        container_name: airbyte-server
        restart: unless-stopped
        environment:
          - AIRBYTE_ROLE=${AIRBYTE_ROLE:-}
          - AIRBYTE_VERSION=${VERSION}
          - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-}
          - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-}
          - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-}
          - CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
          - CONFIG_ROOT=${CONFIG_ROOT}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=${DATABASE_URL}
          - DATABASE_USER=${DATABASE_USER}
          - FEATURE_FLAG_CLIENT=${FEATURE_FLAG_CLIENT}
          - LAUNCHDARKLY_KEY=${LAUNCHDARKLY_KEY}
          - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT}
          - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST}
          - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT}
          - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST}
          - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-}
          - LOG_LEVEL=${LOG_LEVEL}
          - NEW_SCHEDULER=${NEW_SCHEDULER}
          - SECRET_PERSISTENCE=${SECRET_PERSISTENCE}
          - TEMPORAL_HOST=${TEMPORAL_HOST}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
          - JOB_ERROR_REPORTING_STRATEGY=${JOB_ERROR_REPORTING_STRATEGY}
          - JOB_ERROR_REPORTING_SENTRY_DSN=${JOB_ERROR_REPORTING_SENTRY_DSN}
          - WEBAPP_URL=${WEBAPP_URL}
          - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT}
          - WORKSPACE_ROOT=${WORKSPACE_ROOT}
          - GITHUB_STORE_BRANCH=${GITHUB_STORE_BRANCH}
          - MICRONAUT_ENVIRONMENTS=${WORKERS_MICRONAUT_ENVIRONMENTS}
          - AUTO_DETECT_SCHEMA=${AUTO_DETECT_SCHEMA}
          - MAX_NOTIFY_WORKERS=5
          - SHOULD_RUN_NOTIFY_WORKFLOWS=${SHOULD_RUN_NOTIFY_WORKFLOWS}
        ports:
          - "8001"
        configs:
          - flags
        volumes:
          - workspace:${WORKSPACE_ROOT}
          - data:${CONFIG_ROOT}
          - ${LOCAL_ROOT}:${LOCAL_ROOT}
        networks:
          - airbyte_internal
        depends_on:
          bootloader:
            condition: service_completed_successfully
      webapp:
        image: airbyte/webapp:${VERSION}
        logging: *default-logging
        container_name: airbyte-webapp
        restart: unless-stopped
        ports:
          - "80"
        environment:
          - INTERNAL_API_HOST=${INTERNAL_API_HOST}
          - CONNECTOR_BUILDER_API_HOST=${CONNECTOR_BUILDER_API_HOST}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
        networks:
          - airbyte_internal
        depends_on:
          bootloader:
            condition: service_completed_successfully
      airbyte-temporal:
        image: airbyte/temporal:${VERSION}
        logging: *default-logging
        container_name: airbyte-temporal
        restart: unless-stopped
        environment:
          - DB=postgresql
          - DB_PORT=${DATABASE_PORT}
          - DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development.yaml
          - LOG_LEVEL=${LOG_LEVEL}
          - POSTGRES_PWD=${DATABASE_PASSWORD}
          - POSTGRES_SEEDS=${DATABASE_HOST}
          - POSTGRES_USER=${DATABASE_USER}
        volumes:
          - ./temporal/dynamicconfig:/etc/temporal/config/dynamicconfig
        networks:
          - airbyte_internal
      airbyte-cron:
        image: airbyte/cron:${VERSION}
        logging: *default-logging
        container_name: airbyte-cron
        restart: unless-stopped
        environment:
          - AIRBYTE_VERSION=${VERSION}
          - CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=${DATABASE_URL}
          - DATABASE_USER=${DATABASE_USER}
          - DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
          - LOG_LEVEL=${LOG_LEVEL}
          - REMOTE_CONNECTOR_CATALOG_URL=${REMOTE_CONNECTOR_CATALOG_URL}
          - TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS}
          - UPDATE_DEFINITIONS_CRON_ENABLED=${UPDATE_DEFINITIONS_CRON_ENABLED}
          - WORKSPACE_ROOT=${WORKSPACE_ROOT}
          - MICRONAUT_ENVIRONMENTS=${CRON_MICRONAUT_ENVIRONMENTS}
        configs:
          - flags
        volumes:
          - workspace:${WORKSPACE_ROOT}
        networks:
          - airbyte_internal
        depends_on:
          bootloader:
            condition: service_completed_successfully
      airbyte-connector-builder-server:
        image: airbyte/connector-atelier-server:${VERSION}
        logging: *default-logging
        container_name: airbyte-connector-builder-server
        restart: unless-stopped
        ports:
          - 80
        environment:
          - AIRBYTE_VERSION=${VERSION}
          - CDK_VERSION=${CDK_VERSION}
          - DEPLOYMENT_MODE=${DEPLOYMENT_MODE}
          - PYTHON_VERSION=${PYTHON_VERSION}
        networks:
          - airbyte_internal
        depends_on:
          bootloader:
            condition: service_completed_successfully
      airbyte-proxy:
        image: airbyte/proxy:${VERSION}
        container_name: airbyte-proxy
        restart: unless-stopped
        ports:
          - "8000:8000"
          - "8001:8001"
          - "8003:8003"
        environment:
          - BASIC_AUTH_USERNAME=${BASIC_AUTH_USERNAME}
          - BASIC_AUTH_PASSWORD=${BASIC_AUTH_PASSWORD}
          - BASIC_AUTH_PROXY_TIMEOUT=${BASIC_AUTH_PROXY_TIMEOUT}
        networks:
          - airbyte_internal
          - airbyte_public
        depends_on:
          - webapp
          - server
    volumes:
      workspace:
        name: ${WORKSPACE_DOCKER_MOUNT}
      # the data volume is only needed for backward compatibility; when users upgrade
      # from an old Airbyte version that relies on file-based configs, the server needs
      # to read this volume to copy their configs to the database
      data:
        name: ${DATA_DOCKER_MOUNT}
      db:
        name: ${DB_DOCKER_MOUNT}
    configs:
      flags:
        file: ./flags.yml
    networks:
      airbyte_public:
      airbyte_internal:
    

    flags.yml

    flags:
      - name: performance.backgroundJsonSchemaValidation
        serve: false
      - name: heartbeat.failSync
        serve: true
      - name: platform.commitStatsAsap
        serve: true
      - name: connectors.versionOverridesEnabled
        serve: true
      - name: billing.newTrialPolicy
        serve: false
      - name: heartbeat-max-seconds-between-messages
        serve: "10800"
      - name: use-new-notification-workflow
        serve: false
      - name: validation.removeValidationLimit
        serve: false
      - name: autopropagation.enabled
        serve: true
      - name: connection.columnSelection
        serve: true
      - name: refreshSchema.period.hours
        serve: 24
      - name: concurrent.source.stream.read
        serve: false
      - name: platform.add-scheduling-jitter
        serve: false