zephyr-gemma-dpo-no-gen-prompt / train-config.yaml
alvarobartt's picture
alvarobartt HF staff
Upload train-config.yaml with huggingface_hub
8c6ee2b verified
dataset_args:
path: argilla/dpo-mix-7k
format_args:
prompt_format: zephyr-gemma
add_generation_prompt: false
model_args:
pretrained_model_name_or_path: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
torch_dtype: bfloat16
wandb_args:
entity: argilla-io
project: zephyr-gemma-dpo
name: 16bit-no-gen-prompt
training_args:
# DPOTrainer
beta: 0.05
loss_type: sigmoid
max_length: 1024
max_prompt_length: 512
# Trainer (train)
bf16: true
do_train: true
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
learning_rate: 5.0e-7
logging_steps: 10
lr_scheduler_type: cosine
num_train_epochs: 2
optim: adamw_torch
output_dir: data/gemma-7b-it-dpo
per_device_train_batch_size: 2
seed: 42
warmup_ratio: 0.1
warmup_steps: 100
report_to:
- wandb
- tensorboard
# Trainer (eval)
do_eval: true
evaluation_strategy: steps
eval_steps: 100
per_device_eval_batch_size: 4
# Trainer (save)
hub_model_id: alvarobartt/zephyr-gemma-dpo-faithful
hub_private_repo: true
push_to_hub: true
save_strategy: "no" # Quoted, otherwise is casted to `False`
save_total_limit: null
use_accelerate: true
use_unsloth: false