Update README.md
Browse files
README.md
CHANGED
@@ -16,23 +16,35 @@ Base Model: *upstage/SOLAR-10.7B-v1.0*
|
|
16 |
***hyper params I***
|
17 |
|
18 |
batch_size : 16
|
|
|
19 |
num_epochs : 1
|
|
|
20 |
micro_batch : 1
|
|
|
21 |
gradient_accumulation_steps : batch_size // micro_batch
|
22 |
|
23 |
***hyper params II***
|
24 |
|
25 |
cutoff_len : 4096
|
|
|
26 |
lr_scheduler : 'cosine'
|
|
|
27 |
warmup_ratio : 0.06
|
|
|
28 |
learning_rate : 4e-4
|
|
|
29 |
optimizer : 'adamw_torch'
|
|
|
30 |
weight_decay : 0.01
|
|
|
31 |
max_grad_norm : 1.0
|
32 |
|
33 |
***LoRA config***
|
34 |
|
35 |
lora_r : 64
|
|
|
36 |
lora_alpha : 16
|
|
|
37 |
lora_dropout : 0.05
|
|
|
38 |
lora_target_modules : ["gate_proj", "down_proj", "up_proj"]
|
|
|
16 |
***hyper params I***
|
17 |
|
18 |
batch_size : 16
|
19 |
+
|
20 |
num_epochs : 1
|
21 |
+
|
22 |
micro_batch : 1
|
23 |
+
|
24 |
gradient_accumulation_steps : batch_size // micro_batch
|
25 |
|
26 |
***hyper params II***
|
27 |
|
28 |
cutoff_len : 4096
|
29 |
+
|
30 |
lr_scheduler : 'cosine'
|
31 |
+
|
32 |
warmup_ratio : 0.06
|
33 |
+
|
34 |
learning_rate : 4e-4
|
35 |
+
|
36 |
optimizer : 'adamw_torch'
|
37 |
+
|
38 |
weight_decay : 0.01
|
39 |
+
|
40 |
max_grad_norm : 1.0
|
41 |
|
42 |
***LoRA config***
|
43 |
|
44 |
lora_r : 64
|
45 |
+
|
46 |
lora_alpha : 16
|
47 |
+
|
48 |
lora_dropout : 0.05
|
49 |
+
|
50 |
lora_target_modules : ["gate_proj", "down_proj", "up_proj"]
|