hoang14 commited on
Commit
ed9b068
·
verified ·
1 Parent(s): b02eaaa

Update config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +58 -0
config.yaml CHANGED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: /raid/HUB_LLM/080225_vi_test_llama33_70b_instruct/checkpoint-2568/dpo32_epoch2/
2
+ model_type: LlamaForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ dataset_processes: 128
6
+ datasets:
7
+ - path: json
8
+ data_files:
9
+ - /raid/instruction_tuning_060225/data/data_to_train/0103_136k.jsonl
10
+
11
+ type: chat_template
12
+ field_messages: messages
13
+ message_field_role: role
14
+ message_field_content: content
15
+
16
+ val_set_size: 0.0
17
+ output_dir: /raid/HUB_LLM/010325_VAI_v27_70b_instruct
18
+
19
+ sequence_len: 4096
20
+ sample_packing: true
21
+ pad_to_sequence_len: true
22
+
23
+ gradient_accumulation_steps: 16
24
+ micro_batch_size: 1
25
+ num_epochs: 3
26
+ optimizer: paged_adamw_8bit
27
+ lr_scheduler: cosine_with_min_lr
28
+ learning_rate: 3e-6
29
+ lr_scheduler_kwargs:
30
+ min_lr: 0.0000007
31
+
32
+ train_on_inputs: false
33
+ group_by_length: false
34
+ bf16: auto
35
+ fp16:
36
+ tf32: false
37
+
38
+ gradient_checkpointing: true
39
+ gradient_checkpointing_kwargs:
40
+ use_reentrant: false
41
+ early_stopping_patience:
42
+ resume_from_checkpoint:
43
+ logging_steps: 1
44
+ xformers_attention:
45
+ flash_attention: true
46
+
47
+ warmup_steps: 0
48
+ evals_per_epoch: 1
49
+ eval_table_size:
50
+ saves_per_epoch: 2
51
+ save_total_limit: 10
52
+ debug:
53
+ deepspeed: /raid/instruction_tuning_060225/training/accelerate_config/zero3_bf16_cpuoffload_params.json
54
+ weight_decay: 0.1
55
+
56
+ max_grad_norm: 1.0
57
+ special_tokens:
58
+ pad_token: <|end_of_text|>