bf16: true cutoff_len: 2048 dataset: prithivMLmods/Deepthink-Reasoning-Ins,open-thoughts/OpenThoughts-114k dataset_dir: data ddp_timeout: 180000000 do_train: true eval_steps: 200 eval_strategy: steps finetuning_type: full flash_attn: auto gradient_accumulation_steps: 8 include_num_input_tokens_seen: true learning_rate: 0.0001 logging_steps: 1 lr_scheduler_type: polynomial max_grad_norm: 1.0 max_samples: 57000000 model_name_or_path: Qwen/Qwen2.5-0.5B-Instruct num_train_epochs: 1.0 optim: sgd output_dir: saves\Qwen2.5-0.5B-Instruct\full\20-02-2025 packing: false per_device_eval_batch_size: 1 per_device_train_batch_size: 1 plot_loss: true preprocessing_num_workers: 16 report_to: none save_steps: 200 stage: sft template: qwen trust_remote_code: true val_size: 1 warmup_steps: 0