stages:
  download_data:
    cmd: python src/data/make_dataset.py
    deps:
      - data_params.yml
      - src/data/make_dataset.py
    outs:
      - data/raw
  process_data:
    cmd: python src/data/process_data.py
    deps:
      - data_params.yml
      - data/raw
      - src/data/process_data.py
    outs:
      - data/processed/test.csv:
          persist: true
          cache: true
      - data/processed/train.csv:
          persist: true
          cache: true
      - data/processed/validation.csv:
          persist: true
          cache: true
  train:
    cmd: python src/models/train_model.py
    deps:
      - model_params.yml
      - data/processed/train.csv
      - data/processed/validation.csv
      - src/models/train_model.py
    outs:
      - models:
          persist: true
    metrics:
      - reports/training_metrics.csv:
          cache: false
  eval:
    cmd: python src/models/evaluate_model.py
    deps:
      - model_params.yml
      - data/processed/test.csv
      - models
      - src/models/evaluate_model.py
    metrics:
      - reports/evaluation_metrics.csv:
          cache: false
  visualize:
    cmd: streamlit run src/visualization/visualize.py
    deps:
      - models
      - src/visualization/visualize.py
  push_to_hf_hub:
    cmd: python src/models/hf_upload.py
    deps:
      - model_params.yml
      - src/models/hf_upload.py
      - models