lovodkin93's picture
Upload config.yaml with huggingface_hub
ab08cd3 verified
BASE_REPO_ID: google/paligemma-3b-pt-448
FINETUNED_MODEL_ID: lovodkin93/paligemma-3b-pt-448_lora_mmnt_TV_concat_vertical-tgt-bottom-black_balanced
OUTPUT_DIR_PATH: /home/slobodkin/reference_vnli/train_ref_vnli_autorater/models/paligemma-3b-pt-448/lora_mmnt_TV_concat_vertical-tgt-bottom-black_balanced
INSTRUCTIONS: "You are presented with two images and a caption. The caption containes\
\ markups (<u> and </u>) around a mention of some {{ENTITY}}, whose image is the\
\ BOTTOM one. Your goal is to determine if the TOP image aligns with the caption,\
\ and also if it matches the {{ENTITY}} in the BOTTOM image within the caption's\
\ context. Your output should be '3' if the TOP image aligns with both the caption\
\ and the BOTTOM image, '2' if it only aligns with the caption, '1' if it only aligns\
\ with the BOTTOM image, and '0' if it aligns with neither.\n Caption: "
ENTITY_INSTRUCTION_PLACEHOLDER: '{{ENTITY}}'
DEBUG: false
RESUME_FROM_CHECKPOINT: false
MAX_LENGTH: 300
CONCAT_REF_TGT_IMAGES: true
CONCAT_IMAGES_TYPE: vertical-tgt-bottom-black
TRAIN_PARAMS:
BALANCE_TRAIN_DATASETS: true
BATCH_SIZE: 4
FULL_FINETUNE: false
FINETUNE_LLM: true
FINETUNE_MM_PROJECTOR: false
FINETUNE_VISION_MODEL: false
GRADIENT_ACCUMULATION_STEPS: 2
EVAL_INTERVAL: 200
SAVE_INTERVAL: 200
SAVE_TOTAL_LIMIT: 20
METRIC_FOR_BEST_MODEL: Eval/accuracy
DATA:
MEMENTOS:
IMG_DIR: /home/slobodkin/reference_vnli/datasets/mementos/images
DATASETS_CSV_BASE_DIR: /home/slobodkin/reference_vnli/train_ref_vnli_autorater/data/mementos/multiclass
VERSION: 03Nov2024
TRAIN:
DATASETS:
- name: train_min_balanced_multi_classification
task: mementos
EVAL:
DATASETS:
- name: dev_2000_instances
task: mementos
TVQA:
DATASETS_CSV_BASE_DIR: /home/slobodkin/reference_vnli/train_ref_vnli_autorater/data/TVQA_plus
VERSION: 15Dec2024
TRAIN:
DATASETS:
- name: train_balanced_full_200000_instances
task: TVQA
EVAL:
DATASETS:
- name: val_balanced_full_2000_instances
task: TVQA