End of training
Browse files- README.md +7 -7
- adapter_config.json +6 -6
- adapter_model.safetensors +2 -2
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +73 -73
- runs/Jan13_23-59-25_b5637b3f66b9/events.out.tfevents.1705190375.b5637b3f66b9.92446.0 +3 -0
- runs/Jan14_00-01-54_b5637b3f66b9/events.out.tfevents.1705190524.b5637b3f66b9.97198.0 +3 -0
- runs/Jan14_00-16-13_b5637b3f66b9/events.out.tfevents.1705191389.b5637b3f66b9.101961.0 +3 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -50,8 +50,8 @@ sequence_len: 2048
|
|
50 |
sample_packing: false
|
51 |
pad_to_sequence_len: false
|
52 |
|
53 |
-
lora_r:
|
54 |
-
lora_alpha:
|
55 |
lora_dropout: 0.05
|
56 |
lora_target_linear: true
|
57 |
lora_modules_to_save:
|
@@ -69,11 +69,11 @@ wandb_project: dpo-zephyr-deita-nectar
|
|
69 |
wandb_entity: oaaic
|
70 |
wandb_watch:
|
71 |
wandb_run_id:
|
72 |
-
wandb_name: kto-3ep-
|
73 |
wandb_log_model:
|
74 |
|
75 |
gradient_accumulation_steps: 1
|
76 |
-
micro_batch_size:
|
77 |
num_epochs: 3
|
78 |
optimizer: paged_adamw_8bit
|
79 |
adam_beta2: 0.95
|
@@ -139,17 +139,17 @@ More information needed
|
|
139 |
|
140 |
The following hyperparameters were used during training:
|
141 |
- learning_rate: 1e-05
|
142 |
-
- train_batch_size:
|
143 |
- eval_batch_size: 8
|
144 |
- seed: 42
|
145 |
- distributed_type: multi-GPU
|
146 |
- num_devices: 4
|
147 |
-
- total_train_batch_size:
|
148 |
- total_eval_batch_size: 32
|
149 |
- optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
|
150 |
- lr_scheduler_type: linear
|
151 |
- lr_scheduler_warmup_steps: 10
|
152 |
-
- training_steps:
|
153 |
|
154 |
### Training results
|
155 |
|
|
|
50 |
sample_packing: false
|
51 |
pad_to_sequence_len: false
|
52 |
|
53 |
+
lora_r: 256
|
54 |
+
lora_alpha: 128
|
55 |
lora_dropout: 0.05
|
56 |
lora_target_linear: true
|
57 |
lora_modules_to_save:
|
|
|
69 |
wandb_entity: oaaic
|
70 |
wandb_watch:
|
71 |
wandb_run_id:
|
72 |
+
wandb_name: kto-3ep-v3-r256
|
73 |
wandb_log_model:
|
74 |
|
75 |
gradient_accumulation_steps: 1
|
76 |
+
micro_batch_size: 2
|
77 |
num_epochs: 3
|
78 |
optimizer: paged_adamw_8bit
|
79 |
adam_beta2: 0.95
|
|
|
139 |
|
140 |
The following hyperparameters were used during training:
|
141 |
- learning_rate: 1e-05
|
142 |
+
- train_batch_size: 2
|
143 |
- eval_batch_size: 8
|
144 |
- seed: 42
|
145 |
- distributed_type: multi-GPU
|
146 |
- num_devices: 4
|
147 |
+
- total_train_batch_size: 8
|
148 |
- total_eval_batch_size: 32
|
149 |
- optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
|
150 |
- lr_scheduler_type: linear
|
151 |
- lr_scheduler_warmup_steps: 10
|
152 |
+
- training_steps: 3230
|
153 |
|
154 |
### Training results
|
155 |
|
adapter_config.json
CHANGED
@@ -9,23 +9,23 @@
|
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
11 |
"loftq_config": {},
|
12 |
-
"lora_alpha":
|
13 |
"lora_dropout": 0.05,
|
14 |
"megatron_config": null,
|
15 |
"megatron_core": "megatron.core",
|
16 |
"modules_to_save": null,
|
17 |
"peft_type": "LORA",
|
18 |
-
"r":
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"o_proj",
|
|
|
23 |
"down_proj",
|
24 |
-
"v_proj",
|
25 |
"up_proj",
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
11 |
"loftq_config": {},
|
12 |
+
"lora_alpha": 128,
|
13 |
"lora_dropout": 0.05,
|
14 |
"megatron_config": null,
|
15 |
"megatron_core": "megatron.core",
|
16 |
"modules_to_save": null,
|
17 |
"peft_type": "LORA",
|
18 |
+
"r": 256,
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"o_proj",
|
23 |
+
"q_proj",
|
24 |
"down_proj",
|
|
|
25 |
"up_proj",
|
26 |
+
"gate_proj",
|
27 |
+
"v_proj",
|
28 |
+
"k_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e197c0323741596d10f600eb6ba058defb470f2491756a549f6930771e5deb70
|
3 |
+
size 1342239008
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a3c6b5f84edcc9460d29155b6f11844d43a2badb26292b071ea03765c28f1af
|
3 |
+
size 4917096408
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdc23002d59a5df08a6244df334b1958ae78b426d43a5ebe94a0af2ad8a7b587
|
3 |
+
size 4968544640
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dba996654afa2bc8637f3fb3e8edc8c5ce7c804b9a607dd299a258602588aaf
|
3 |
+
size 4946490800
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b71ef791b8de8c7cc7fea4c1d68df265468f25718fade06a8558a12e6967bd92
|
3 |
+
size 994136032
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
@@ -62,18 +62,18 @@
|
|
62 |
"model.layers.10.mlp.up_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
63 |
"model.layers.10.mlp.up_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
64 |
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
65 |
-
"model.layers.10.self_attn.k_proj.base_layer.weight": "model-
|
66 |
-
"model.layers.10.self_attn.k_proj.lora_A.default.weight": "model-
|
67 |
-
"model.layers.10.self_attn.k_proj.lora_B.default.weight": "model-
|
68 |
-
"model.layers.10.self_attn.o_proj.base_layer.weight": "model-
|
69 |
-
"model.layers.10.self_attn.o_proj.lora_A.default.weight": "model-
|
70 |
-
"model.layers.10.self_attn.o_proj.lora_B.default.weight": "model-
|
71 |
-
"model.layers.10.self_attn.q_proj.base_layer.weight": "model-
|
72 |
-
"model.layers.10.self_attn.q_proj.lora_A.default.weight": "model-
|
73 |
-
"model.layers.10.self_attn.q_proj.lora_B.default.weight": "model-
|
74 |
-
"model.layers.10.self_attn.v_proj.base_layer.weight": "model-
|
75 |
-
"model.layers.10.self_attn.v_proj.lora_A.default.weight": "model-
|
76 |
-
"model.layers.10.self_attn.v_proj.lora_B.default.weight": "model-
|
77 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
78 |
"model.layers.11.mlp.down_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
79 |
"model.layers.11.mlp.down_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
@@ -304,23 +304,23 @@
|
|
304 |
"model.layers.2.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
305 |
"model.layers.2.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
306 |
"model.layers.2.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
307 |
-
"model.layers.20.input_layernorm.weight": "model-
|
308 |
-
"model.layers.20.mlp.down_proj.base_layer.weight": "model-
|
309 |
-
"model.layers.20.mlp.down_proj.lora_A.default.weight": "model-
|
310 |
-
"model.layers.20.mlp.down_proj.lora_B.default.weight": "model-
|
311 |
-
"model.layers.20.mlp.gate_proj.base_layer.weight": "model-
|
312 |
-
"model.layers.20.mlp.gate_proj.lora_A.default.weight": "model-
|
313 |
-
"model.layers.20.mlp.gate_proj.lora_B.default.weight": "model-
|
314 |
-
"model.layers.20.mlp.up_proj.base_layer.weight": "model-
|
315 |
-
"model.layers.20.mlp.up_proj.lora_A.default.weight": "model-
|
316 |
-
"model.layers.20.mlp.up_proj.lora_B.default.weight": "model-
|
317 |
-
"model.layers.20.post_attention_layernorm.weight": "model-
|
318 |
"model.layers.20.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
319 |
"model.layers.20.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
320 |
"model.layers.20.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
321 |
-
"model.layers.20.self_attn.o_proj.base_layer.weight": "model-
|
322 |
-
"model.layers.20.self_attn.o_proj.lora_A.default.weight": "model-
|
323 |
-
"model.layers.20.self_attn.o_proj.lora_B.default.weight": "model-
|
324 |
"model.layers.20.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
325 |
"model.layers.20.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
326 |
"model.layers.20.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
@@ -338,18 +338,18 @@
|
|
338 |
"model.layers.21.mlp.up_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
339 |
"model.layers.21.mlp.up_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
340 |
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
341 |
-
"model.layers.21.self_attn.k_proj.base_layer.weight": "model-
|
342 |
-
"model.layers.21.self_attn.k_proj.lora_A.default.weight": "model-
|
343 |
-
"model.layers.21.self_attn.k_proj.lora_B.default.weight": "model-
|
344 |
"model.layers.21.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
345 |
"model.layers.21.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
346 |
"model.layers.21.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
347 |
-
"model.layers.21.self_attn.q_proj.base_layer.weight": "model-
|
348 |
-
"model.layers.21.self_attn.q_proj.lora_A.default.weight": "model-
|
349 |
-
"model.layers.21.self_attn.q_proj.lora_B.default.weight": "model-
|
350 |
-
"model.layers.21.self_attn.v_proj.base_layer.weight": "model-
|
351 |
-
"model.layers.21.self_attn.v_proj.lora_A.default.weight": "model-
|
352 |
-
"model.layers.21.self_attn.v_proj.lora_B.default.weight": "model-
|
353 |
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
354 |
"model.layers.22.mlp.down_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
355 |
"model.layers.22.mlp.down_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
@@ -557,17 +557,17 @@
|
|
557 |
"model.layers.3.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
558 |
"model.layers.3.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
559 |
"model.layers.3.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
560 |
-
"model.layers.30.input_layernorm.weight": "model-
|
561 |
-
"model.layers.30.mlp.down_proj.base_layer.weight": "model-
|
562 |
-
"model.layers.30.mlp.down_proj.lora_A.default.weight": "model-
|
563 |
-
"model.layers.30.mlp.down_proj.lora_B.default.weight": "model-
|
564 |
"model.layers.30.mlp.gate_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
565 |
"model.layers.30.mlp.gate_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
566 |
"model.layers.30.mlp.gate_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
567 |
-
"model.layers.30.mlp.up_proj.base_layer.weight": "model-
|
568 |
-
"model.layers.30.mlp.up_proj.lora_A.default.weight": "model-
|
569 |
-
"model.layers.30.mlp.up_proj.lora_B.default.weight": "model-
|
570 |
-
"model.layers.30.post_attention_layernorm.weight": "model-
|
571 |
"model.layers.30.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
572 |
"model.layers.30.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
573 |
"model.layers.30.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
@@ -580,29 +580,29 @@
|
|
580 |
"model.layers.30.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
581 |
"model.layers.30.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
582 |
"model.layers.30.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
583 |
-
"model.layers.31.input_layernorm.weight": "model-
|
584 |
-
"model.layers.31.mlp.down_proj.base_layer.weight": "model-
|
585 |
-
"model.layers.31.mlp.down_proj.lora_A.default.weight": "model-
|
586 |
-
"model.layers.31.mlp.down_proj.lora_B.default.weight": "model-
|
587 |
-
"model.layers.31.mlp.gate_proj.base_layer.weight": "model-
|
588 |
-
"model.layers.31.mlp.gate_proj.lora_A.default.weight": "model-
|
589 |
-
"model.layers.31.mlp.gate_proj.lora_B.default.weight": "model-
|
590 |
-
"model.layers.31.mlp.up_proj.base_layer.weight": "model-
|
591 |
-
"model.layers.31.mlp.up_proj.lora_A.default.weight": "model-
|
592 |
-
"model.layers.31.mlp.up_proj.lora_B.default.weight": "model-
|
593 |
-
"model.layers.31.post_attention_layernorm.weight": "model-
|
594 |
-
"model.layers.31.self_attn.k_proj.base_layer.weight": "model-
|
595 |
-
"model.layers.31.self_attn.k_proj.lora_A.default.weight": "model-
|
596 |
-
"model.layers.31.self_attn.k_proj.lora_B.default.weight": "model-
|
597 |
-
"model.layers.31.self_attn.o_proj.base_layer.weight": "model-
|
598 |
-
"model.layers.31.self_attn.o_proj.lora_A.default.weight": "model-
|
599 |
-
"model.layers.31.self_attn.o_proj.lora_B.default.weight": "model-
|
600 |
-
"model.layers.31.self_attn.q_proj.base_layer.weight": "model-
|
601 |
-
"model.layers.31.self_attn.q_proj.lora_A.default.weight": "model-
|
602 |
-
"model.layers.31.self_attn.q_proj.lora_B.default.weight": "model-
|
603 |
-
"model.layers.31.self_attn.v_proj.base_layer.weight": "model-
|
604 |
-
"model.layers.31.self_attn.v_proj.lora_A.default.weight": "model-
|
605 |
-
"model.layers.31.self_attn.v_proj.lora_B.default.weight": "model-
|
606 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
607 |
"model.layers.4.mlp.down_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
608 |
"model.layers.4.mlp.down_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
@@ -718,17 +718,17 @@
|
|
718 |
"model.layers.8.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
719 |
"model.layers.8.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
720 |
"model.layers.8.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
721 |
-
"model.layers.9.input_layernorm.weight": "model-
|
722 |
-
"model.layers.9.mlp.down_proj.base_layer.weight": "model-
|
723 |
-
"model.layers.9.mlp.down_proj.lora_A.default.weight": "model-
|
724 |
-
"model.layers.9.mlp.down_proj.lora_B.default.weight": "model-
|
725 |
"model.layers.9.mlp.gate_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
726 |
"model.layers.9.mlp.gate_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
727 |
"model.layers.9.mlp.gate_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
728 |
"model.layers.9.mlp.up_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
729 |
"model.layers.9.mlp.up_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
730 |
"model.layers.9.mlp.up_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
731 |
-
"model.layers.9.post_attention_layernorm.weight": "model-
|
732 |
"model.layers.9.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
733 |
"model.layers.9.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
734 |
"model.layers.9.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
@@ -741,6 +741,6 @@
|
|
741 |
"model.layers.9.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
742 |
"model.layers.9.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
743 |
"model.layers.9.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
744 |
-
"model.norm.weight": "model-
|
745 |
}
|
746 |
}
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 15826173952
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
62 |
"model.layers.10.mlp.up_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
63 |
"model.layers.10.mlp.up_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
64 |
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
65 |
+
"model.layers.10.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
66 |
+
"model.layers.10.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
67 |
+
"model.layers.10.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
68 |
+
"model.layers.10.self_attn.o_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
69 |
+
"model.layers.10.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
70 |
+
"model.layers.10.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
71 |
+
"model.layers.10.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
72 |
+
"model.layers.10.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
73 |
+
"model.layers.10.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
74 |
+
"model.layers.10.self_attn.v_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
75 |
+
"model.layers.10.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
76 |
+
"model.layers.10.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
77 |
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
78 |
"model.layers.11.mlp.down_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
79 |
"model.layers.11.mlp.down_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
|
|
304 |
"model.layers.2.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
305 |
"model.layers.2.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
306 |
"model.layers.2.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
307 |
+
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
308 |
+
"model.layers.20.mlp.down_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
309 |
+
"model.layers.20.mlp.down_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
310 |
+
"model.layers.20.mlp.down_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
311 |
+
"model.layers.20.mlp.gate_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
312 |
+
"model.layers.20.mlp.gate_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
313 |
+
"model.layers.20.mlp.gate_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
314 |
+
"model.layers.20.mlp.up_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
315 |
+
"model.layers.20.mlp.up_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
316 |
+
"model.layers.20.mlp.up_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
317 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
318 |
"model.layers.20.self_attn.k_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
319 |
"model.layers.20.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
320 |
"model.layers.20.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
321 |
+
"model.layers.20.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
322 |
+
"model.layers.20.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
323 |
+
"model.layers.20.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
324 |
"model.layers.20.self_attn.q_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
325 |
"model.layers.20.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
326 |
"model.layers.20.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
|
|
338 |
"model.layers.21.mlp.up_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
339 |
"model.layers.21.mlp.up_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
340 |
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
341 |
+
"model.layers.21.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
342 |
+
"model.layers.21.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
343 |
+
"model.layers.21.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
344 |
"model.layers.21.self_attn.o_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
345 |
"model.layers.21.self_attn.o_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
346 |
"model.layers.21.self_attn.o_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
347 |
+
"model.layers.21.self_attn.q_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
348 |
+
"model.layers.21.self_attn.q_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
349 |
+
"model.layers.21.self_attn.q_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
350 |
+
"model.layers.21.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
351 |
+
"model.layers.21.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
352 |
+
"model.layers.21.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
353 |
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
354 |
"model.layers.22.mlp.down_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
355 |
"model.layers.22.mlp.down_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
|
|
557 |
"model.layers.3.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
558 |
"model.layers.3.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
559 |
"model.layers.3.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
560 |
+
"model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
561 |
+
"model.layers.30.mlp.down_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
562 |
+
"model.layers.30.mlp.down_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
563 |
+
"model.layers.30.mlp.down_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
564 |
"model.layers.30.mlp.gate_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
565 |
"model.layers.30.mlp.gate_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
566 |
"model.layers.30.mlp.gate_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
567 |
+
"model.layers.30.mlp.up_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
568 |
+
"model.layers.30.mlp.up_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
569 |
+
"model.layers.30.mlp.up_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
570 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
571 |
"model.layers.30.self_attn.k_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
572 |
"model.layers.30.self_attn.k_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
573 |
"model.layers.30.self_attn.k_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
|
|
580 |
"model.layers.30.self_attn.v_proj.base_layer.weight": "model-00003-of-00004.safetensors",
|
581 |
"model.layers.30.self_attn.v_proj.lora_A.default.weight": "model-00003-of-00004.safetensors",
|
582 |
"model.layers.30.self_attn.v_proj.lora_B.default.weight": "model-00003-of-00004.safetensors",
|
583 |
+
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
584 |
+
"model.layers.31.mlp.down_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
585 |
+
"model.layers.31.mlp.down_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
586 |
+
"model.layers.31.mlp.down_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
587 |
+
"model.layers.31.mlp.gate_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
588 |
+
"model.layers.31.mlp.gate_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
589 |
+
"model.layers.31.mlp.gate_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
590 |
+
"model.layers.31.mlp.up_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
591 |
+
"model.layers.31.mlp.up_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
592 |
+
"model.layers.31.mlp.up_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
593 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
594 |
+
"model.layers.31.self_attn.k_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
595 |
+
"model.layers.31.self_attn.k_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
596 |
+
"model.layers.31.self_attn.k_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
597 |
+
"model.layers.31.self_attn.o_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
598 |
+
"model.layers.31.self_attn.o_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
599 |
+
"model.layers.31.self_attn.o_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
600 |
+
"model.layers.31.self_attn.q_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
601 |
+
"model.layers.31.self_attn.q_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
602 |
+
"model.layers.31.self_attn.q_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
603 |
+
"model.layers.31.self_attn.v_proj.base_layer.weight": "model-00004-of-00004.safetensors",
|
604 |
+
"model.layers.31.self_attn.v_proj.lora_A.default.weight": "model-00004-of-00004.safetensors",
|
605 |
+
"model.layers.31.self_attn.v_proj.lora_B.default.weight": "model-00004-of-00004.safetensors",
|
606 |
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
607 |
"model.layers.4.mlp.down_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
608 |
"model.layers.4.mlp.down_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
|
|
718 |
"model.layers.8.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
719 |
"model.layers.8.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
720 |
"model.layers.8.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
721 |
+
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
722 |
+
"model.layers.9.mlp.down_proj.base_layer.weight": "model-00002-of-00004.safetensors",
|
723 |
+
"model.layers.9.mlp.down_proj.lora_A.default.weight": "model-00002-of-00004.safetensors",
|
724 |
+
"model.layers.9.mlp.down_proj.lora_B.default.weight": "model-00002-of-00004.safetensors",
|
725 |
"model.layers.9.mlp.gate_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
726 |
"model.layers.9.mlp.gate_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
727 |
"model.layers.9.mlp.gate_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
728 |
"model.layers.9.mlp.up_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
729 |
"model.layers.9.mlp.up_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
730 |
"model.layers.9.mlp.up_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
731 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
732 |
"model.layers.9.self_attn.k_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
733 |
"model.layers.9.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
734 |
"model.layers.9.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
|
|
741 |
"model.layers.9.self_attn.v_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
742 |
"model.layers.9.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00004.safetensors",
|
743 |
"model.layers.9.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00004.safetensors",
|
744 |
+
"model.norm.weight": "model-00004-of-00004.safetensors"
|
745 |
}
|
746 |
}
|
runs/Jan13_23-59-25_b5637b3f66b9/events.out.tfevents.1705190375.b5637b3f66b9.92446.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8874f412db9c87f35d8ed512bcb8b617d88a7fa8ef0146836c21207630f29e23
|
3 |
+
size 4830
|
runs/Jan14_00-01-54_b5637b3f66b9/events.out.tfevents.1705190524.b5637b3f66b9.97198.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fae5e1074485ffd1bf08978beaf2541d44a0ca0ab9aedf3adb717b974d6278eb
|
3 |
+
size 5453
|
runs/Jan14_00-16-13_b5637b3f66b9/events.out.tfevents.1705191389.b5637b3f66b9.101961.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:870e01f6fd1473e804334e6163263ac048b9245808ef14d5523d9bbdeda2c9ae
|
3 |
+
size 2051221
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4283
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f35dd2da756435a461f292de7f41f8f794f5bb87c3b8cf175f6493702e2c93f1
|
3 |
size 4283
|