ACE-M3_Conclusion / config.yaml
AIUSRTMP's picture
init
c37ce5b
exp_name: mme
subdomain: conclusion
datasets:
- path-vqa
- slake
- vqa-rad
dataset_directory: Model_Training/Dataset/train
evaluation_model_priority:
- gpt4
- gpt35
n_epochs: 1
n_examples: null
n_eval_examples: 10
batch_size: 128
gradient_accumulation_steps: 64
eval_batch_size: 2
max_length: 3520
max_prompt_length: 2924
reward_token: Unified
frozen_layers:
enable: true
language_model_layers:
- '0'
- '1'
- '2'
- '3'
- '4'
- '5'
- '6'
- '7'
- '8'
- '9'
- '10'
- '11'
- '12'
- '13'
- '14'
- '15'
- '16'
- '17'
- '18'
- '19'
- '20'
- '21'
- '22'
- '23'
vision_tower_layers: -1
debug: false
fsdp_port: 46031
wandb:
enabled: true
entity: null
project: direct-preference-optimization
local_dirs:
- /scr-ssd
- /scr
- .cache
sample_during_eval: false
n_eval_model_samples: 2
do_first_eval: true
local_run_dir: .cache/lhe_exc/mme_2024-04-23_01-31-43_653905
lr: 1.0e-06
max_grad_norm: 10.0
trainer: FSDPTrainer
optimizer: RMSprop
warmup_steps: 6
activation_checkpointing: false
eval_every: 14976
minimum_log_interval_secs: 1.0
seed: 0
model:
name_or_path: ./LM/llava-1.5-7b-hf-medllama2
tokenizer_name_or_path: null
archive: ./LM/mme/mme_conclusion/sft_only/Unified/policy.pt
block_name: LlamaDecoderLayer
policy_dtype: bfloat16
fsdp_policy_mp: bfloat16
reference_dtype: bfloat16
vision_encoder_block: CLIPEncoderLayer
language_model_block: LlamaDecoderLayer
loss:
name: dpo
beta: 0.1
reference_free: false