AmmarFahmy
adding all files
105b369
from typing import Optional, Dict
from phi.app.db_app import DbApp
from phi.k8s.app.base import (
K8sApp,
AppVolumeType, # noqa: F401
ContainerContext,
ServiceType, # noqa: F401
RestartPolicy, # noqa: F401
ImagePullPolicy, # noqa: F401
)
from phi.utils.common import str_to_int
from phi.utils.log import logger
class AirflowBase(K8sApp):
# -*- App Name
name: str = "airflow"
# -*- Image Configuration
image_name: str = "phidata/airflow"
image_tag: str = "2.7.1"
# -*- App Ports
# Open a container port if open_port=True
open_port: bool = False
port_number: int = 8080
# -*- Workspace Configuration
# Path to the parent directory of the workspace inside the container
# When using git-sync, the git repo is cloned inside this directory
# i.e. this is the parent directory of the workspace
workspace_parent_dir_container_path: str = "/usr/local/workspace"
# -*- Airflow Configuration
# airflow_env sets the AIRFLOW_ENV env var and can be used by
# DAGs to separate dev/stg/prd code
airflow_env: Optional[str] = None
# Set the AIRFLOW_HOME env variable
# Defaults to: /usr/local/airflow
airflow_home: Optional[str] = None
# Set the AIRFLOW__CORE__DAGS_FOLDER env variable to the workspace_root/{airflow_dags_dir}
# By default, airflow_dags_dir is set to the "dags" folder in the workspace
airflow_dags_dir: str = "dags"
# Creates an airflow admin with username: admin, pass: admin
create_airflow_admin_user: bool = False
# Airflow Executor
executor: str = "SequentialExecutor"
# -*- Airflow Database Configuration
# Set as True to wait for db before starting airflow
wait_for_db: bool = False
# Set as True to delay start by 60 seconds to wait for db migrations
wait_for_db_migrate: bool = False
# Connect to the database using a DbApp
db_app: Optional[DbApp] = None
# Provide database connection details manually
# db_user can be provided here or as the
# DB_USER env var in the secrets_file
db_user: Optional[str] = None
# db_password can be provided here or as the
# DB_PASSWORD env var in the secrets_file
db_password: Optional[str] = None
# db_database can be provided here or as the
# DB_DATABASE env var in the secrets_file
db_database: Optional[str] = None
# db_host can be provided here or as the
# DB_HOST env var in the secrets_file
db_host: Optional[str] = None
# db_port can be provided here or as the
# DB_PORT env var in the secrets_file
db_port: Optional[int] = None
# db_driver can be provided here or as the
# DB_DRIVER env var in the secrets_file
db_driver: str = "postgresql+psycopg2"
db_result_backend_driver: str = "db+postgresql"
# Airflow db connections in the format { conn_id: conn_url }
# converted to env var: AIRFLOW_CONN__conn_id = conn_url
db_connections: Optional[Dict] = None
# Set as True to migrate (initialize/upgrade) the airflow_db
db_migrate: bool = False
# -*- Airflow Redis Configuration
# Set as True to wait for redis before starting airflow
wait_for_redis: bool = False
# Connect to redis using a DbApp
redis_app: Optional[DbApp] = None
# Provide redis connection details manually
# redis_password can be provided here or as the
# REDIS_PASSWORD env var in the secrets_file
redis_password: Optional[str] = None
# redis_schema can be provided here or as the
# REDIS_SCHEMA env var in the secrets_file
redis_schema: Optional[str] = None
# redis_host can be provided here or as the
# REDIS_HOST env var in the secrets_file
redis_host: Optional[str] = None
# redis_port can be provided here or as the
# REDIS_PORT env var in the secrets_file
redis_port: Optional[int] = None
# redis_driver can be provided here or as the
# REDIS_DRIVER env var in the secrets_file
redis_driver: str = "redis"
# -*- Other args
load_examples: bool = False
def get_db_user(self) -> Optional[str]:
return self.db_user or self.get_secret_from_file("DATABASE_USER") or self.get_secret_from_file("DB_USER")
def get_db_password(self) -> Optional[str]:
return (
self.db_password
or self.get_secret_from_file("DATABASE_PASSWORD")
or self.get_secret_from_file("DB_PASSWORD")
)
def get_db_database(self) -> Optional[str]:
return self.db_database or self.get_secret_from_file("DATABASE_DB") or self.get_secret_from_file("DB_DATABASE")
def get_db_driver(self) -> Optional[str]:
return self.db_driver or self.get_secret_from_file("DATABASE_DRIVER") or self.get_secret_from_file("DB_DRIVER")
def get_db_host(self) -> Optional[str]:
return self.db_host or self.get_secret_from_file("DATABASE_HOST") or self.get_secret_from_file("DB_HOST")
def get_db_port(self) -> Optional[int]:
return (
self.db_port
or str_to_int(self.get_secret_from_file("DATABASE_PORT"))
or str_to_int(self.get_secret_from_file("DB_PORT"))
)
def get_redis_password(self) -> Optional[str]:
return self.redis_password or self.get_secret_from_file("REDIS_PASSWORD")
def get_redis_schema(self) -> Optional[str]:
return self.redis_schema or self.get_secret_from_file("REDIS_SCHEMA")
def get_redis_host(self) -> Optional[str]:
return self.redis_host or self.get_secret_from_file("REDIS_HOST")
def get_redis_port(self) -> Optional[int]:
return self.redis_port or str_to_int(self.get_secret_from_file("REDIS_PORT"))
def get_redis_driver(self) -> Optional[str]:
return self.redis_driver or self.get_secret_from_file("REDIS_DRIVER")
def get_airflow_home(self) -> str:
return self.airflow_home or "/usr/local/airflow"
def get_container_env(self, container_context: ContainerContext) -> Dict[str, str]:
from phi.constants import (
PHI_RUNTIME_ENV_VAR,
PYTHONPATH_ENV_VAR,
REQUIREMENTS_FILE_PATH_ENV_VAR,
SCRIPTS_DIR_ENV_VAR,
STORAGE_DIR_ENV_VAR,
WORKFLOWS_DIR_ENV_VAR,
WORKSPACE_DIR_ENV_VAR,
WORKSPACE_HASH_ENV_VAR,
WORKSPACE_ID_ENV_VAR,
WORKSPACE_ROOT_ENV_VAR,
INIT_AIRFLOW_ENV_VAR,
AIRFLOW_ENV_ENV_VAR,
AIRFLOW_HOME_ENV_VAR,
AIRFLOW_DAGS_FOLDER_ENV_VAR,
AIRFLOW_EXECUTOR_ENV_VAR,
AIRFLOW_DB_CONN_URL_ENV_VAR,
)
# Container Environment
container_env: Dict[str, str] = self.container_env or {}
container_env.update(
{
"INSTALL_REQUIREMENTS": str(self.install_requirements),
"MOUNT_WORKSPACE": str(self.mount_workspace),
"PRINT_ENV_ON_LOAD": str(self.print_env_on_load),
PHI_RUNTIME_ENV_VAR: "kubernetes",
REQUIREMENTS_FILE_PATH_ENV_VAR: container_context.requirements_file or "",
SCRIPTS_DIR_ENV_VAR: container_context.scripts_dir or "",
STORAGE_DIR_ENV_VAR: container_context.storage_dir or "",
WORKFLOWS_DIR_ENV_VAR: container_context.workflows_dir or "",
WORKSPACE_DIR_ENV_VAR: container_context.workspace_dir or "",
WORKSPACE_ROOT_ENV_VAR: container_context.workspace_root or "",
# Env variables used by Airflow
# INIT_AIRFLOW env var is required for phidata to generate DAGs from workflows
INIT_AIRFLOW_ENV_VAR: str(True),
"DB_MIGRATE": str(self.db_migrate),
"WAIT_FOR_DB": str(self.wait_for_db),
"WAIT_FOR_DB_MIGRATE": str(self.wait_for_db_migrate),
"WAIT_FOR_REDIS": str(self.wait_for_redis),
"CREATE_AIRFLOW_ADMIN_USER": str(self.create_airflow_admin_user),
AIRFLOW_EXECUTOR_ENV_VAR: str(self.executor),
"AIRFLOW__CORE__LOAD_EXAMPLES": str(self.load_examples),
# Airflow Navbar color
"AIRFLOW__WEBSERVER__NAVBAR_COLOR": "#d1fae5",
}
)
try:
if container_context.workspace_schema is not None:
if container_context.workspace_schema.id_workspace is not None:
container_env[WORKSPACE_ID_ENV_VAR] = str(container_context.workspace_schema.id_workspace) or ""
if container_context.workspace_schema.ws_hash is not None:
container_env[WORKSPACE_HASH_ENV_VAR] = container_context.workspace_schema.ws_hash
except Exception:
pass
if self.set_python_path:
python_path = self.python_path
if python_path is None:
python_path = f"{container_context.workspace_root}:{self.get_airflow_home()}"
if self.add_python_paths is not None:
python_path = "{}:{}".format(python_path, ":".join(self.add_python_paths))
if python_path is not None:
container_env[PYTHONPATH_ENV_VAR] = python_path
# Set aws region and profile
self.set_aws_env_vars(env_dict=container_env)
# Set the AIRFLOW__CORE__DAGS_FOLDER
container_env[AIRFLOW_DAGS_FOLDER_ENV_VAR] = f"{container_context.workspace_root}/{self.airflow_dags_dir}"
# Set the AIRFLOW_ENV
if self.airflow_env is not None:
container_env[AIRFLOW_ENV_ENV_VAR] = self.airflow_env
# Set the AIRFLOW_HOME
if self.airflow_home is not None:
container_env[AIRFLOW_HOME_ENV_VAR] = self.get_airflow_home()
# Set the AIRFLOW__CONN_ variables
if self.db_connections is not None:
for conn_id, conn_url in self.db_connections.items():
try:
af_conn_id = str("AIRFLOW_CONN_{}".format(conn_id)).upper()
container_env[af_conn_id] = conn_url
except Exception as e:
logger.exception(e)
continue
# Airflow db connection
db_user = self.get_db_user()
db_password = self.get_db_password()
db_database = self.get_db_database()
db_host = self.get_db_host()
db_port = self.get_db_port()
db_driver = self.get_db_driver()
if self.db_app is not None and isinstance(self.db_app, DbApp):
logger.debug(f"Reading db connection details from: {self.db_app.name}")
if db_user is None:
db_user = self.db_app.get_db_user()
if db_password is None:
db_password = self.db_app.get_db_password()
if db_database is None:
db_database = self.db_app.get_db_database()
if db_host is None:
db_host = self.db_app.get_db_host()
if db_port is None:
db_port = self.db_app.get_db_port()
if db_driver is None:
db_driver = self.db_app.get_db_driver()
db_connection_url = f"{db_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_database}"
# Set the AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
if "None" not in db_connection_url:
logger.debug(f"AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: {db_connection_url}")
container_env[AIRFLOW_DB_CONN_URL_ENV_VAR] = db_connection_url
# Set the database connection details in the container env
if db_host is not None:
container_env["DATABASE_HOST"] = db_host
if db_port is not None:
container_env["DATABASE_PORT"] = str(db_port)
# Airflow redis connection
if self.executor == "CeleryExecutor":
# Airflow celery result backend
celery_result_backend_driver = self.db_result_backend_driver or db_driver
celery_result_backend_url = (
f"{celery_result_backend_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_database}"
)
# Set the AIRFLOW__CELERY__RESULT_BACKEND
if "None" not in celery_result_backend_url:
container_env["AIRFLOW__CELERY__RESULT_BACKEND"] = celery_result_backend_url
# Airflow celery broker url
_redis_pass = self.get_redis_password()
redis_password = f"{_redis_pass}@" if _redis_pass else ""
redis_schema = self.get_redis_schema()
redis_host = self.get_redis_host()
redis_port = self.get_redis_port()
redis_driver = self.get_redis_driver()
if self.redis_app is not None and isinstance(self.redis_app, DbApp):
logger.debug(f"Reading redis connection details from: {self.redis_app.name}")
if redis_password is None:
redis_password = self.redis_app.get_db_password()
if redis_schema is None:
redis_schema = self.redis_app.get_db_database() or "0"
if redis_host is None:
redis_host = self.redis_app.get_db_host()
if redis_port is None:
redis_port = self.redis_app.get_db_port()
if redis_driver is None:
redis_driver = self.redis_app.get_db_driver()
# Set the AIRFLOW__CELERY__RESULT_BACKEND
celery_broker_url = f"{redis_driver}://{redis_password}{redis_host}:{redis_port}/{redis_schema}"
if "None" not in celery_broker_url:
logger.debug(f"AIRFLOW__CELERY__BROKER_URL: {celery_broker_url}")
container_env["AIRFLOW__CELERY__BROKER_URL"] = celery_broker_url
# Set the redis connection details in the container env
if redis_host is not None:
container_env["REDIS_HOST"] = redis_host
if redis_port is not None:
container_env["REDIS_PORT"] = str(redis_port)
# Update the container env using env_file
env_data_from_file = self.get_env_file_data()
if env_data_from_file is not None:
container_env.update({k: str(v) for k, v in env_data_from_file.items() if v is not None})
# Update the container env with user provided env_vars
# this overwrites any existing variables with the same key
if self.env_vars is not None and isinstance(self.env_vars, dict):
container_env.update({k: str(v) for k, v in self.env_vars.items() if v is not None})
# logger.debug("Container Environment: {}".format(container_env))
return container_env