from typing import Optional, Dict from phi.app.db_app import DbApp from phi.k8s.app.base import ( K8sApp, AppVolumeType, # noqa: F401 ContainerContext, ServiceType, # noqa: F401 RestartPolicy, # noqa: F401 ImagePullPolicy, # noqa: F401 ) from phi.utils.common import str_to_int from phi.utils.log import logger class AirflowBase(K8sApp): # -*- App Name name: str = "airflow" # -*- Image Configuration image_name: str = "phidata/airflow" image_tag: str = "2.7.1" # -*- App Ports # Open a container port if open_port=True open_port: bool = False port_number: int = 8080 # -*- Workspace Configuration # Path to the parent directory of the workspace inside the container # When using git-sync, the git repo is cloned inside this directory # i.e. this is the parent directory of the workspace workspace_parent_dir_container_path: str = "/usr/local/workspace" # -*- Airflow Configuration # airflow_env sets the AIRFLOW_ENV env var and can be used by # DAGs to separate dev/stg/prd code airflow_env: Optional[str] = None # Set the AIRFLOW_HOME env variable # Defaults to: /usr/local/airflow airflow_home: Optional[str] = None # Set the AIRFLOW__CORE__DAGS_FOLDER env variable to the workspace_root/{airflow_dags_dir} # By default, airflow_dags_dir is set to the "dags" folder in the workspace airflow_dags_dir: str = "dags" # Creates an airflow admin with username: admin, pass: admin create_airflow_admin_user: bool = False # Airflow Executor executor: str = "SequentialExecutor" # -*- Airflow Database Configuration # Set as True to wait for db before starting airflow wait_for_db: bool = False # Set as True to delay start by 60 seconds to wait for db migrations wait_for_db_migrate: bool = False # Connect to the database using a DbApp db_app: Optional[DbApp] = None # Provide database connection details manually # db_user can be provided here or as the # DB_USER env var in the secrets_file db_user: Optional[str] = None # db_password can be provided here or as the # DB_PASSWORD env var in the secrets_file db_password: Optional[str] = None # db_database can be provided here or as the # DB_DATABASE env var in the secrets_file db_database: Optional[str] = None # db_host can be provided here or as the # DB_HOST env var in the secrets_file db_host: Optional[str] = None # db_port can be provided here or as the # DB_PORT env var in the secrets_file db_port: Optional[int] = None # db_driver can be provided here or as the # DB_DRIVER env var in the secrets_file db_driver: str = "postgresql+psycopg2" db_result_backend_driver: str = "db+postgresql" # Airflow db connections in the format { conn_id: conn_url } # converted to env var: AIRFLOW_CONN__conn_id = conn_url db_connections: Optional[Dict] = None # Set as True to migrate (initialize/upgrade) the airflow_db db_migrate: bool = False # -*- Airflow Redis Configuration # Set as True to wait for redis before starting airflow wait_for_redis: bool = False # Connect to redis using a DbApp redis_app: Optional[DbApp] = None # Provide redis connection details manually # redis_password can be provided here or as the # REDIS_PASSWORD env var in the secrets_file redis_password: Optional[str] = None # redis_schema can be provided here or as the # REDIS_SCHEMA env var in the secrets_file redis_schema: Optional[str] = None # redis_host can be provided here or as the # REDIS_HOST env var in the secrets_file redis_host: Optional[str] = None # redis_port can be provided here or as the # REDIS_PORT env var in the secrets_file redis_port: Optional[int] = None # redis_driver can be provided here or as the # REDIS_DRIVER env var in the secrets_file redis_driver: str = "redis" # -*- Other args load_examples: bool = False def get_db_user(self) -> Optional[str]: return self.db_user or self.get_secret_from_file("DATABASE_USER") or self.get_secret_from_file("DB_USER") def get_db_password(self) -> Optional[str]: return ( self.db_password or self.get_secret_from_file("DATABASE_PASSWORD") or self.get_secret_from_file("DB_PASSWORD") ) def get_db_database(self) -> Optional[str]: return self.db_database or self.get_secret_from_file("DATABASE_DB") or self.get_secret_from_file("DB_DATABASE") def get_db_driver(self) -> Optional[str]: return self.db_driver or self.get_secret_from_file("DATABASE_DRIVER") or self.get_secret_from_file("DB_DRIVER") def get_db_host(self) -> Optional[str]: return self.db_host or self.get_secret_from_file("DATABASE_HOST") or self.get_secret_from_file("DB_HOST") def get_db_port(self) -> Optional[int]: return ( self.db_port or str_to_int(self.get_secret_from_file("DATABASE_PORT")) or str_to_int(self.get_secret_from_file("DB_PORT")) ) def get_redis_password(self) -> Optional[str]: return self.redis_password or self.get_secret_from_file("REDIS_PASSWORD") def get_redis_schema(self) -> Optional[str]: return self.redis_schema or self.get_secret_from_file("REDIS_SCHEMA") def get_redis_host(self) -> Optional[str]: return self.redis_host or self.get_secret_from_file("REDIS_HOST") def get_redis_port(self) -> Optional[int]: return self.redis_port or str_to_int(self.get_secret_from_file("REDIS_PORT")) def get_redis_driver(self) -> Optional[str]: return self.redis_driver or self.get_secret_from_file("REDIS_DRIVER") def get_airflow_home(self) -> str: return self.airflow_home or "/usr/local/airflow" def get_container_env(self, container_context: ContainerContext) -> Dict[str, str]: from phi.constants import ( PHI_RUNTIME_ENV_VAR, PYTHONPATH_ENV_VAR, REQUIREMENTS_FILE_PATH_ENV_VAR, SCRIPTS_DIR_ENV_VAR, STORAGE_DIR_ENV_VAR, WORKFLOWS_DIR_ENV_VAR, WORKSPACE_DIR_ENV_VAR, WORKSPACE_HASH_ENV_VAR, WORKSPACE_ID_ENV_VAR, WORKSPACE_ROOT_ENV_VAR, INIT_AIRFLOW_ENV_VAR, AIRFLOW_ENV_ENV_VAR, AIRFLOW_HOME_ENV_VAR, AIRFLOW_DAGS_FOLDER_ENV_VAR, AIRFLOW_EXECUTOR_ENV_VAR, AIRFLOW_DB_CONN_URL_ENV_VAR, ) # Container Environment container_env: Dict[str, str] = self.container_env or {} container_env.update( { "INSTALL_REQUIREMENTS": str(self.install_requirements), "MOUNT_WORKSPACE": str(self.mount_workspace), "PRINT_ENV_ON_LOAD": str(self.print_env_on_load), PHI_RUNTIME_ENV_VAR: "kubernetes", REQUIREMENTS_FILE_PATH_ENV_VAR: container_context.requirements_file or "", SCRIPTS_DIR_ENV_VAR: container_context.scripts_dir or "", STORAGE_DIR_ENV_VAR: container_context.storage_dir or "", WORKFLOWS_DIR_ENV_VAR: container_context.workflows_dir or "", WORKSPACE_DIR_ENV_VAR: container_context.workspace_dir or "", WORKSPACE_ROOT_ENV_VAR: container_context.workspace_root or "", # Env variables used by Airflow # INIT_AIRFLOW env var is required for phidata to generate DAGs from workflows INIT_AIRFLOW_ENV_VAR: str(True), "DB_MIGRATE": str(self.db_migrate), "WAIT_FOR_DB": str(self.wait_for_db), "WAIT_FOR_DB_MIGRATE": str(self.wait_for_db_migrate), "WAIT_FOR_REDIS": str(self.wait_for_redis), "CREATE_AIRFLOW_ADMIN_USER": str(self.create_airflow_admin_user), AIRFLOW_EXECUTOR_ENV_VAR: str(self.executor), "AIRFLOW__CORE__LOAD_EXAMPLES": str(self.load_examples), # Airflow Navbar color "AIRFLOW__WEBSERVER__NAVBAR_COLOR": "#d1fae5", } ) try: if container_context.workspace_schema is not None: if container_context.workspace_schema.id_workspace is not None: container_env[WORKSPACE_ID_ENV_VAR] = str(container_context.workspace_schema.id_workspace) or "" if container_context.workspace_schema.ws_hash is not None: container_env[WORKSPACE_HASH_ENV_VAR] = container_context.workspace_schema.ws_hash except Exception: pass if self.set_python_path: python_path = self.python_path if python_path is None: python_path = f"{container_context.workspace_root}:{self.get_airflow_home()}" if self.add_python_paths is not None: python_path = "{}:{}".format(python_path, ":".join(self.add_python_paths)) if python_path is not None: container_env[PYTHONPATH_ENV_VAR] = python_path # Set aws region and profile self.set_aws_env_vars(env_dict=container_env) # Set the AIRFLOW__CORE__DAGS_FOLDER container_env[AIRFLOW_DAGS_FOLDER_ENV_VAR] = f"{container_context.workspace_root}/{self.airflow_dags_dir}" # Set the AIRFLOW_ENV if self.airflow_env is not None: container_env[AIRFLOW_ENV_ENV_VAR] = self.airflow_env # Set the AIRFLOW_HOME if self.airflow_home is not None: container_env[AIRFLOW_HOME_ENV_VAR] = self.get_airflow_home() # Set the AIRFLOW__CONN_ variables if self.db_connections is not None: for conn_id, conn_url in self.db_connections.items(): try: af_conn_id = str("AIRFLOW_CONN_{}".format(conn_id)).upper() container_env[af_conn_id] = conn_url except Exception as e: logger.exception(e) continue # Airflow db connection db_user = self.get_db_user() db_password = self.get_db_password() db_database = self.get_db_database() db_host = self.get_db_host() db_port = self.get_db_port() db_driver = self.get_db_driver() if self.db_app is not None and isinstance(self.db_app, DbApp): logger.debug(f"Reading db connection details from: {self.db_app.name}") if db_user is None: db_user = self.db_app.get_db_user() if db_password is None: db_password = self.db_app.get_db_password() if db_database is None: db_database = self.db_app.get_db_database() if db_host is None: db_host = self.db_app.get_db_host() if db_port is None: db_port = self.db_app.get_db_port() if db_driver is None: db_driver = self.db_app.get_db_driver() db_connection_url = f"{db_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_database}" # Set the AIRFLOW__DATABASE__SQL_ALCHEMY_CONN if "None" not in db_connection_url: logger.debug(f"AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: {db_connection_url}") container_env[AIRFLOW_DB_CONN_URL_ENV_VAR] = db_connection_url # Set the database connection details in the container env if db_host is not None: container_env["DATABASE_HOST"] = db_host if db_port is not None: container_env["DATABASE_PORT"] = str(db_port) # Airflow redis connection if self.executor == "CeleryExecutor": # Airflow celery result backend celery_result_backend_driver = self.db_result_backend_driver or db_driver celery_result_backend_url = ( f"{celery_result_backend_driver}://{db_user}:{db_password}@{db_host}:{db_port}/{db_database}" ) # Set the AIRFLOW__CELERY__RESULT_BACKEND if "None" not in celery_result_backend_url: container_env["AIRFLOW__CELERY__RESULT_BACKEND"] = celery_result_backend_url # Airflow celery broker url _redis_pass = self.get_redis_password() redis_password = f"{_redis_pass}@" if _redis_pass else "" redis_schema = self.get_redis_schema() redis_host = self.get_redis_host() redis_port = self.get_redis_port() redis_driver = self.get_redis_driver() if self.redis_app is not None and isinstance(self.redis_app, DbApp): logger.debug(f"Reading redis connection details from: {self.redis_app.name}") if redis_password is None: redis_password = self.redis_app.get_db_password() if redis_schema is None: redis_schema = self.redis_app.get_db_database() or "0" if redis_host is None: redis_host = self.redis_app.get_db_host() if redis_port is None: redis_port = self.redis_app.get_db_port() if redis_driver is None: redis_driver = self.redis_app.get_db_driver() # Set the AIRFLOW__CELERY__RESULT_BACKEND celery_broker_url = f"{redis_driver}://{redis_password}{redis_host}:{redis_port}/{redis_schema}" if "None" not in celery_broker_url: logger.debug(f"AIRFLOW__CELERY__BROKER_URL: {celery_broker_url}") container_env["AIRFLOW__CELERY__BROKER_URL"] = celery_broker_url # Set the redis connection details in the container env if redis_host is not None: container_env["REDIS_HOST"] = redis_host if redis_port is not None: container_env["REDIS_PORT"] = str(redis_port) # Update the container env using env_file env_data_from_file = self.get_env_file_data() if env_data_from_file is not None: container_env.update({k: str(v) for k, v in env_data_from_file.items() if v is not None}) # Update the container env with user provided env_vars # this overwrites any existing variables with the same key if self.env_vars is not None and isinstance(self.env_vars, dict): container_env.update({k: str(v) for k, v in self.env_vars.items() if v is not None}) # logger.debug("Container Environment: {}".format(container_env)) return container_env