File size: 1,234 Bytes
8c6ee2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
dataset_args:
path: argilla/dpo-mix-7k
format_args:
prompt_format: zephyr-gemma
add_generation_prompt: false
model_args:
pretrained_model_name_or_path: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
torch_dtype: bfloat16
wandb_args:
entity: argilla-io
project: zephyr-gemma-dpo
name: 16bit-no-gen-prompt
training_args:
# DPOTrainer
beta: 0.05
loss_type: sigmoid
max_length: 1024
max_prompt_length: 512
# Trainer (train)
bf16: true
do_train: true
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
learning_rate: 5.0e-7
logging_steps: 10
lr_scheduler_type: cosine
num_train_epochs: 2
optim: adamw_torch
output_dir: data/gemma-7b-it-dpo
per_device_train_batch_size: 2
seed: 42
warmup_ratio: 0.1
warmup_steps: 100
report_to:
- wandb
- tensorboard
# Trainer (eval)
do_eval: true
evaluation_strategy: steps
eval_steps: 100
per_device_eval_batch_size: 4
# Trainer (save)
hub_model_id: alvarobartt/zephyr-gemma-dpo-faithful
hub_private_repo: true
push_to_hub: true
save_strategy: "no" # Quoted, otherwise is casted to `False`
save_total_limit: null
use_accelerate: true
use_unsloth: false
|