base_model: /raid/HUB_LLM/080225_vi_test_llama33_70b_instruct/checkpoint-2568/dpo32_epoch2/ model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer dataset_processes: 128 datasets: - path: json data_files: - /raid/instruction_tuning_060225/data/data_to_train/0103_136k.jsonl type: chat_template field_messages: messages message_field_role: role message_field_content: content val_set_size: 0.0 output_dir: /raid/HUB_LLM/010325_VAI_v27_70b_instruct sequence_len: 4096 sample_packing: true pad_to_sequence_len: true gradient_accumulation_steps: 16 micro_batch_size: 1 num_epochs: 3 optimizer: paged_adamw_8bit lr_scheduler: cosine_with_min_lr learning_rate: 3e-6 lr_scheduler_kwargs: min_lr: 0.0000007 train_on_inputs: false group_by_length: false bf16: auto fp16: tf32: false gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false early_stopping_patience: resume_from_checkpoint: logging_steps: 1 xformers_attention: flash_attention: true warmup_steps: 0 evals_per_epoch: 1 eval_table_size: saves_per_epoch: 2 save_total_limit: 10 debug: deepspeed: /raid/instruction_tuning_060225/training/accelerate_config/zero3_bf16_cpuoffload_params.json weight_decay: 0.1 max_grad_norm: 1.0 special_tokens: pad_token: <|end_of_text|>