~n0mn0m/airflow-docker

fe8dd675f7be2d8dddd2ac43ca18efc591faee16 — Alex Hagerman 1 year, 5 months ago 40cfa94 + 47d6e96
Merge pull request #1 from AlexHagerman/rabbitmq-sql-server

Database and Task Queue change
6 files changed, 101 insertions(+), 114 deletions(-)

D .circleci/config.yml
M .gitignore
M Dockerfile
M docker-compose-CeleryExecutor.yml
M docker-compose-LocalExecutor.yml
M script/entrypoint.sh
D .circleci/config.yml => .circleci/config.yml +0 -30
@@ 1,30 0,0 @@
version: 2

jobs:
  build:
    docker:
      - image: docker:18.06.1-ce-git
    working_directory: ~/CircleCI/docker-airflow
    steps:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
      - run: |
          docker build -t puckel/docker-airflow .

  test:
    docker:
      - image: docker:18.06.1-ce-git
    steps:
      - setup_remote_docker
      - run: |
          docker run puckel/docker-airflow version |grep '1.10.1'

workflows:
  version: 2
  build_and_test:
    jobs:
      - build
      - test:
          requires:
            - build

M .gitignore => .gitignore +3 -0
@@ 24,3 24,6 @@ sftp-config.json

# Python
__pycache__

# custom
.env
\ No newline at end of file

M Dockerfile => Dockerfile +37 -7
@@ 5,7 5,7 @@
# SOURCE: https://github.com/puckel/docker-airflow

FROM python:3.6-slim
LABEL maintainer="Puckel_"
LABEL maintainer="Alex Hagerman"

# Never prompts the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive


@@ 15,7 15,7 @@ ENV TERM linux
ARG AIRFLOW_VERSION=1.10.1
ARG AIRFLOW_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
ARG PYTHON_DEPS="pyodbc"
ENV AIRFLOW_GPL_UNIDECODE yes

# Define en_US.


@@ 25,6 25,9 @@ ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8

# MS SQL EULA
ENV ACCEPT_EULA=Y

RUN set -ex \
    && buildDeps=' \
        freetds-dev \


@@ 47,19 50,40 @@ RUN set -ex \
        rsync \
        netcat \
        locales \
        gnupg2 \
        apt-transport-https \
    && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
    && locale-gen \
    && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
    && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \
    && pip install -U pip setuptools wheel \
    && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow

# Undocumented mssql dependency
RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bash_profile 
RUN echo "deb http://httpredir.debian.org/debian jessie main contrib non-free\n\
deb-src http://httpredir.debian.org/debian jessie main contrib non-free\n\
\n\
deb http://security.debian.org/ jessie/updates main contrib non-free\n\
deb-src http://security.debian.org/ jessie/updates main contrib non-free" >> /etc/apt/sources.list.d/jessie.list

RUN apt update \
    && apt install libssl1.0.0 

RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \
    && curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list | tee /etc/apt/sources.list.d/msprod.list 

RUN apt-get update -yqq \
    && apt-get install -yqq mssql-tools unixodbc-dev

RUN pip install -U pip setuptools wheel \
    && pip install pytz \
    && pip install pyOpenSSL \
    && pip install ndg-httpsclient \
    && pip install pyasn1 \
    && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
    && pip install apache-airflow[crypto,celery,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
    && pip install 'redis>=2.10.5,<3' \
    && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \
    && apt-get purge --auto-remove -yqq $buildDeps \
    && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi

RUN apt-get purge --auto-remove -yqq $buildDeps \
    && apt-get autoremove -yqq --purge \
    && apt-get clean \
    && rm -rf \


@@ 74,6 98,12 @@ COPY script/entrypoint.sh /entrypoint.sh
COPY config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg

RUN chown -R airflow: ${AIRFLOW_HOME}
# RUN echo "[ODBC Driver 17 for SQL Server]\n\
# Description=Microsoft ODBC Driver 17 for SQL Server\n\
# Driver=/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.2.so.0.1" >> /etc/odbcinst.ini

RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bash_profile 
RUN echo 'export PATH="$PATH:/opt/mssql-tools/bin"' >> ~/.bashrc

EXPOSE 8080 5555 8793


M docker-compose-CeleryExecutor.yml => docker-compose-CeleryExecutor.yml +38 -40
@@ 1,38 1,40 @@
version: '2.1'
services:
    redis:
        image: 'redis:3.2.7'
        # command: redis-server --requirepass redispass
version: '3'

    postgres:
        image: postgres:9.6
services:
    rabbitmq:
        image: rabbitmq:3-management
        hostname: rabbitmq
        environment:
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
        # Uncomment these lines to persist data on the local filesystem.
        #     - PGDATA=/var/lib/postgresql/data/pgdata
        # volumes:
        #     - ./pgdata:/var/lib/postgresql/data/pgdata
        - RABBITMQ_ERLANG_COOKIE=${RABBITMQ_ERLANG_COOKIE}
        - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER}
        - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS}
        - RABBITMQ_DEFAULT_VHOST=${RABBITMQ_DEFAULT_VHOST}

    mssql:
        image: microsoft/mssql-server-linux:latest
        environment: 
            - ACCEPT_EULA=Y
            - SA_PASSWORD=YourStrong!Passw0rd
        ports:
            - 1433:1433
        volumes:
            - /var/opt/mssql

    webserver:
        image: puckel/docker-airflow:1.10.1
        image: docker-airflow:latest
        restart: always
        depends_on:
            - postgres
            - redis
            - mssql
            - rabbitmq
        environment:
            - LOAD_EX=n
            - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
            - EXECUTOR=Celery
            # - POSTGRES_USER=airflow
            # - POSTGRES_PASSWORD=airflow
            # - POSTGRES_DB=airflow
            # - REDIS_PASSWORD=redispass
            - AIRFLOW__CELERY__BROKER_URL=${CELERY_RABBIT_BROKER}
            - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQL_ALCHEMY_CONN}
            - AIRFLOW__CELERY__RESULT_BACKEND=${CELERY_RESULTS_BACKEND}
        volumes:
            - ./dags:/usr/local/airflow/dags
            # Uncomment to include custom plugins
            # - ./plugins:/usr/local/airflow/plugins
        ports:
            - "8080:8080"
        command: webserver


@@ 43,50 45,46 @@ services:
            retries: 3

    flower:
        image: puckel/docker-airflow:1.10.1
        image: docker-airflow:latest
        restart: always
        depends_on:
            - redis
            - rabbitmq
        environment:
            - EXECUTOR=Celery
            # - REDIS_PASSWORD=redispass
            - AIRFLOW__CELERY__BROKER_URL=${CELERY_RABBIT_BROKER}
            - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQL_ALCHEMY_CONN}
            - AIRFLOW__CELERY__RESULT_BACKEND=${CELERY_RESULTS_BACKEND}
        ports:
            - "5555:5555"
        command: flower

    scheduler:
        image: puckel/docker-airflow:1.10.1
        image: docker-airflow:latest
        restart: always
        depends_on:
            - webserver
        volumes:
            - ./dags:/usr/local/airflow/dags
            # Uncomment to include custom plugins
            # - ./plugins:/usr/local/airflow/plugins
        environment:
            - LOAD_EX=n
            - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
            - EXECUTOR=Celery
            # - POSTGRES_USER=airflow
            # - POSTGRES_PASSWORD=airflow
            # - POSTGRES_DB=airflow
            # - REDIS_PASSWORD=redispass
            - AIRFLOW__CELERY__BROKER_URL=${CELERY_RABBIT_BROKER}
            - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQL_ALCHEMY_CONN}
            - AIRFLOW__CELERY__RESULT_BACKEND=${CELERY_RESULTS_BACKEND}
        command: scheduler

    worker:
        image: puckel/docker-airflow:1.10.1
        image: docker-airflow:latest
        restart: always
        depends_on:
            - scheduler
        volumes:
            - ./dags:/usr/local/airflow/dags
            # Uncomment to include custom plugins
            # - ./plugins:/usr/local/airflow/plugins
        environment:
            - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
            - EXECUTOR=Celery
            # - POSTGRES_USER=airflow
            # - POSTGRES_PASSWORD=airflow
            # - POSTGRES_DB=airflow
            # - REDIS_PASSWORD=redispass
            - AIRFLOW__CELERY__BROKER_URL=${CELERY_RABBIT_BROKER}
            - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQL_ALCHEMY_CONN}
            - AIRFLOW__CELERY__RESULT_BACKEND=${CELERY_RESULTS_BACKEND}
        command: worker

M docker-compose-LocalExecutor.yml => docker-compose-LocalExecutor.yml +15 -11
@@ 1,24 1,28 @@
version: '2.1'
version: '3'

services:
    postgres:
        image: postgres:9.6
        environment:
            - POSTGRES_USER=airflow
            - POSTGRES_PASSWORD=airflow
            - POSTGRES_DB=airflow
    mssql:
        image: microsoft/mssql-server-linux:latest
        environment: 
            - ACCEPT_EULA=Y
            - SA_PASSWORD=YourStrong!Passw0rd
        ports:
            - 1433:1433
        volumes:
            - /var/opt/mssql

    webserver:
        image: puckel/docker-airflow:1.10.1
        image: docker-airflow:latest
        restart: always
        depends_on:
            - postgres
            - mssql
        environment:
            - LOAD_EX=n
            - EXECUTOR=Local
            - AIRFLOW__CORE__SQL_ALCHEMY_CONN=${SQL_ALCHEMY_CONN}
            - AIRFLOW__CELERY__RESULT_BACKEND=${CELERY_RESULTS_BACKEND}
        volumes:
            - ./dags:/usr/local/airflow/dags
            # Uncomment to include custom plugins
            # - ./plugins:/usr/local/airflow/plugins
        ports:
            - "8080:8080"
        command: webserver

M script/entrypoint.sh => script/entrypoint.sh +8 -26
@@ 1,29 1,14 @@
#!/usr/bin/env bash

TRY_LOOP="20"

: "${REDIS_HOST:="redis"}"
: "${REDIS_PORT:="6379"}"
: "${REDIS_PASSWORD:=""}"

: "${POSTGRES_HOST:="postgres"}"
: "${POSTGRES_PORT:="5432"}"
: "${POSTGRES_USER:="airflow"}"
: "${POSTGRES_PASSWORD:="airflow"}"
: "${POSTGRES_DB:="airflow"}"

# Defaults and back-compat
: "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}"
: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}"

export \
  AIRFLOW__CELERY__BROKER_URL \
  AIRFLOW__CELERY__RESULT_BACKEND \
  AIRFLOW__CORE__EXECUTOR \
  AIRFLOW__CORE__FERNET_KEY \
  AIRFLOW__CORE__LOAD_EXAMPLES \
  AIRFLOW__CORE__SQL_ALCHEMY_CONN \


# Load DAGs exemples (default: Yes)
if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]]


@@ 36,12 21,6 @@ if [ -e "/requirements.txt" ]; then
    $(which pip) install --user -r /requirements.txt
fi

if [ -n "$REDIS_PASSWORD" ]; then
    REDIS_PREFIX=:${REDIS_PASSWORD}@
else
    REDIS_PREFIX=
fi

wait_for_port() {
  local name="$1" host="$2" port="$3"
  local j=0


@@ 57,14 36,11 @@ wait_for_port() {
}

if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then
  AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB"
  AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB"
  wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT"
  wait_for_port "SQL Server" "mssql" "1433"
fi

if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then
  AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1"
  wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT"
  wait_for_port "Rabbit MQ" "rabbitmq" "5672"
fi

case "$1" in


@@ 85,6 61,12 @@ case "$1" in
    sleep 10
    exec airflow "$@"
    ;;
  resetdb)
    exec airflow "$@"
    ;;
  initdb)
    exec airflow "$@"
    ;;
  version)
    exec airflow "$@"
    ;;