TTTXXX01 commited on
Commit
fc1c8f2
·
verified ·
1 Parent(s): 1ce79b7

Model save

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: TTTXXX01/My-Zephyr-7B-iter-1
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: My-Zephyr-7B-iter-2
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # My-Zephyr-7B-iter-2
17
+
18
+ This model is a fine-tuned version of [TTTXXX01/My-Zephyr-7B-iter-1](https://huggingface.co/TTTXXX01/My-Zephyr-7B-iter-1) on an unknown dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-07
38
+ - train_batch_size: 3
39
+ - eval_batch_size: 3
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 3
43
+ - gradient_accumulation_steps: 4
44
+ - total_train_batch_size: 36
45
+ - total_eval_batch_size: 9
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.41.2
58
+ - Pytorch 2.3.0+cu121
59
+ - Datasets 2.19.1
60
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.998234255444379,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.387972928302468,
5
+ "train_runtime": 9484.4674,
6
+ "train_samples": 15283,
7
+ "train_samples_per_second": 1.611,
8
+ "train_steps_per_second": 0.045
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.41.2"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58dd6b9d56e576aa1e3ab7798619b56c8a1de7e5c10f4c6b706525bc31235643
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786e6441ec3b70ad264c210512f28d009108f33c30092e122606005461d1f3d6
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ba2bf89b573c5283d35013cb1413bffe3ddb3c70b3c52b5bd7f31ff5a932ef
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.998234255444379,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.387972928302468,
5
+ "train_runtime": 9484.4674,
6
+ "train_samples": 15283,
7
+ "train_samples_per_second": 1.611,
8
+ "train_steps_per_second": 0.045
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.998234255444379,
5
+ "eval_steps": 500,
6
+ "global_step": 424,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002354326074161271,
13
+ "grad_norm": 387.14608138046685,
14
+ "learning_rate": 1.1627906976744186e-08,
15
+ "logits/chosen": 0.9922162890434265,
16
+ "logits/rejected": 2.5945053100585938,
17
+ "logps/chosen": -671.7749633789062,
18
+ "logps/pi_response": -362.8890380859375,
19
+ "logps/ref_response": -362.8890380859375,
20
+ "logps/rejected": -872.0834350585938,
21
+ "loss": 0.6931,
22
+ "rewards/accuracies": 0.0,
23
+ "rewards/chosen": 0.0,
24
+ "rewards/margins": 0.0,
25
+ "rewards/rejected": 0.0,
26
+ "step": 1
27
+ },
28
+ {
29
+ "epoch": 0.023543260741612712,
30
+ "grad_norm": 416.60085993891914,
31
+ "learning_rate": 1.1627906976744186e-07,
32
+ "logits/chosen": 0.5027725696563721,
33
+ "logits/rejected": 1.1612285375595093,
34
+ "logps/chosen": -615.636474609375,
35
+ "logps/pi_response": -373.8717346191406,
36
+ "logps/ref_response": -371.5905456542969,
37
+ "logps/rejected": -773.8812255859375,
38
+ "loss": 0.68,
39
+ "rewards/accuracies": 0.5185185074806213,
40
+ "rewards/chosen": 0.0004265241150278598,
41
+ "rewards/margins": 0.02714625373482704,
42
+ "rewards/rejected": -0.026719728484749794,
43
+ "step": 10
44
+ },
45
+ {
46
+ "epoch": 0.047086521483225424,
47
+ "grad_norm": 146.4782737758625,
48
+ "learning_rate": 2.3255813953488372e-07,
49
+ "logits/chosen": 1.0533249378204346,
50
+ "logits/rejected": 1.2860088348388672,
51
+ "logps/chosen": -583.3096923828125,
52
+ "logps/pi_response": -664.9867553710938,
53
+ "logps/ref_response": -384.66943359375,
54
+ "logps/rejected": -1088.1510009765625,
55
+ "loss": 0.5167,
56
+ "rewards/accuracies": 0.8500000834465027,
57
+ "rewards/chosen": -0.05585538223385811,
58
+ "rewards/margins": 2.6645126342773438,
59
+ "rewards/rejected": -2.7203681468963623,
60
+ "step": 20
61
+ },
62
+ {
63
+ "epoch": 0.07062978222483814,
64
+ "grad_norm": 193.36310820542772,
65
+ "learning_rate": 3.4883720930232557e-07,
66
+ "logits/chosen": 1.5033495426177979,
67
+ "logits/rejected": 2.0693583488464355,
68
+ "logps/chosen": -675.2313232421875,
69
+ "logps/pi_response": -1286.485107421875,
70
+ "logps/ref_response": -382.0600280761719,
71
+ "logps/rejected": -1842.0443115234375,
72
+ "loss": 0.3477,
73
+ "rewards/accuracies": 0.8333333730697632,
74
+ "rewards/chosen": -1.2492175102233887,
75
+ "rewards/margins": 8.74349594116211,
76
+ "rewards/rejected": -9.99271297454834,
77
+ "step": 30
78
+ },
79
+ {
80
+ "epoch": 0.09417304296645085,
81
+ "grad_norm": 161.8114972801858,
82
+ "learning_rate": 4.6511627906976743e-07,
83
+ "logits/chosen": 2.231173515319824,
84
+ "logits/rejected": 2.243076801300049,
85
+ "logps/chosen": -888.1588745117188,
86
+ "logps/pi_response": -1944.935546875,
87
+ "logps/ref_response": -385.018798828125,
88
+ "logps/rejected": -2694.44189453125,
89
+ "loss": 0.5111,
90
+ "rewards/accuracies": 0.8666666746139526,
91
+ "rewards/chosen": -3.3236842155456543,
92
+ "rewards/margins": 15.406656265258789,
93
+ "rewards/rejected": -18.7303409576416,
94
+ "step": 40
95
+ },
96
+ {
97
+ "epoch": 0.11771630370806356,
98
+ "grad_norm": 73.27022369826706,
99
+ "learning_rate": 4.995836722963699e-07,
100
+ "logits/chosen": 1.7006012201309204,
101
+ "logits/rejected": 2.5465452671051025,
102
+ "logps/chosen": -736.0626220703125,
103
+ "logps/pi_response": -1658.806640625,
104
+ "logps/ref_response": -418.013671875,
105
+ "logps/rejected": -2036.8658447265625,
106
+ "loss": 0.3367,
107
+ "rewards/accuracies": 0.8999999761581421,
108
+ "rewards/chosen": -1.4946272373199463,
109
+ "rewards/margins": 11.121105194091797,
110
+ "rewards/rejected": -12.615732192993164,
111
+ "step": 50
112
+ },
113
+ {
114
+ "epoch": 0.14125956444967627,
115
+ "grad_norm": 197.275331081517,
116
+ "learning_rate": 4.975478535699678e-07,
117
+ "logits/chosen": 2.0557973384857178,
118
+ "logits/rejected": 2.7715961933135986,
119
+ "logps/chosen": -786.7219848632812,
120
+ "logps/pi_response": -1516.2132568359375,
121
+ "logps/ref_response": -400.8934326171875,
122
+ "logps/rejected": -1846.4228515625,
123
+ "loss": 0.3929,
124
+ "rewards/accuracies": 0.800000011920929,
125
+ "rewards/chosen": -1.6042267084121704,
126
+ "rewards/margins": 9.362323760986328,
127
+ "rewards/rejected": -10.966550827026367,
128
+ "step": 60
129
+ },
130
+ {
131
+ "epoch": 0.164802825191289,
132
+ "grad_norm": 125.32869675723494,
133
+ "learning_rate": 4.938298919762906e-07,
134
+ "logits/chosen": 2.2630465030670166,
135
+ "logits/rejected": 2.313655138015747,
136
+ "logps/chosen": -874.7604370117188,
137
+ "logps/pi_response": -1775.0299072265625,
138
+ "logps/ref_response": -406.5614013671875,
139
+ "logps/rejected": -2185.577392578125,
140
+ "loss": 0.3204,
141
+ "rewards/accuracies": 0.875,
142
+ "rewards/chosen": -2.6973066329956055,
143
+ "rewards/margins": 11.22050666809082,
144
+ "rewards/rejected": -13.917811393737793,
145
+ "step": 70
146
+ },
147
+ {
148
+ "epoch": 0.1883460859329017,
149
+ "grad_norm": 183.2640323450364,
150
+ "learning_rate": 4.884550518934592e-07,
151
+ "logits/chosen": 2.544085741043091,
152
+ "logits/rejected": 2.2697596549987793,
153
+ "logps/chosen": -814.3087158203125,
154
+ "logps/pi_response": -1254.173583984375,
155
+ "logps/ref_response": -385.601318359375,
156
+ "logps/rejected": -1756.57421875,
157
+ "loss": 0.4552,
158
+ "rewards/accuracies": 0.8500000238418579,
159
+ "rewards/chosen": -2.785598039627075,
160
+ "rewards/margins": 6.5690412521362305,
161
+ "rewards/rejected": -9.354639053344727,
162
+ "step": 80
163
+ },
164
+ {
165
+ "epoch": 0.21188934667451442,
166
+ "grad_norm": 117.0357298014428,
167
+ "learning_rate": 4.814598565584062e-07,
168
+ "logits/chosen": 3.02203369140625,
169
+ "logits/rejected": 3.5366241931915283,
170
+ "logps/chosen": -908.7738037109375,
171
+ "logps/pi_response": -1195.170166015625,
172
+ "logps/ref_response": -383.25390625,
173
+ "logps/rejected": -1815.823974609375,
174
+ "loss": 0.3526,
175
+ "rewards/accuracies": 0.85833340883255,
176
+ "rewards/chosen": -3.5764949321746826,
177
+ "rewards/margins": 6.11873722076416,
178
+ "rewards/rejected": -9.695232391357422,
179
+ "step": 90
180
+ },
181
+ {
182
+ "epoch": 0.23543260741612712,
183
+ "grad_norm": 398.94556345700823,
184
+ "learning_rate": 4.7289183988333603e-07,
185
+ "logits/chosen": 2.4340739250183105,
186
+ "logits/rejected": 3.0718626976013184,
187
+ "logps/chosen": -979.5059814453125,
188
+ "logps/pi_response": -1823.2095947265625,
189
+ "logps/ref_response": -389.8714294433594,
190
+ "logps/rejected": -2347.225341796875,
191
+ "loss": 0.5772,
192
+ "rewards/accuracies": 0.8500000238418579,
193
+ "rewards/chosen": -4.193310260772705,
194
+ "rewards/margins": 10.396108627319336,
195
+ "rewards/rejected": -14.5894193649292,
196
+ "step": 100
197
+ },
198
+ {
199
+ "epoch": 0.2589758681577399,
200
+ "grad_norm": 122.8282681969783,
201
+ "learning_rate": 4.6280922345219255e-07,
202
+ "logits/chosen": 2.0978169441223145,
203
+ "logits/rejected": 2.4143967628479004,
204
+ "logps/chosen": -942.2001953125,
205
+ "logps/pi_response": -1763.4075927734375,
206
+ "logps/ref_response": -404.2528381347656,
207
+ "logps/rejected": -2334.311279296875,
208
+ "loss": 0.2675,
209
+ "rewards/accuracies": 0.8916667103767395,
210
+ "rewards/chosen": -3.5824508666992188,
211
+ "rewards/margins": 11.474252700805664,
212
+ "rewards/rejected": -15.056703567504883,
213
+ "step": 110
214
+ },
215
+ {
216
+ "epoch": 0.28251912889935255,
217
+ "grad_norm": 114.42881059618138,
218
+ "learning_rate": 4.512805208920118e-07,
219
+ "logits/chosen": 1.876939058303833,
220
+ "logits/rejected": 1.8963384628295898,
221
+ "logps/chosen": -1116.2161865234375,
222
+ "logps/pi_response": -2194.660888671875,
223
+ "logps/ref_response": -382.1685485839844,
224
+ "logps/rejected": -2665.172607421875,
225
+ "loss": 0.3802,
226
+ "rewards/accuracies": 0.8833333849906921,
227
+ "rewards/chosen": -5.01099157333374,
228
+ "rewards/margins": 14.220196723937988,
229
+ "rewards/rejected": -19.231189727783203,
230
+ "step": 120
231
+ },
232
+ {
233
+ "epoch": 0.30606238964096527,
234
+ "grad_norm": 289.6702886782038,
235
+ "learning_rate": 4.383840723075488e-07,
236
+ "logits/chosen": 0.6433286070823669,
237
+ "logits/rejected": 1.8898694515228271,
238
+ "logps/chosen": -1148.931884765625,
239
+ "logps/pi_response": -1483.739990234375,
240
+ "logps/ref_response": -384.0348815917969,
241
+ "logps/rejected": -2114.025634765625,
242
+ "loss": 0.5347,
243
+ "rewards/accuracies": 0.875,
244
+ "rewards/chosen": -5.955471515655518,
245
+ "rewards/margins": 6.998837947845459,
246
+ "rewards/rejected": -12.954309463500977,
247
+ "step": 130
248
+ },
249
+ {
250
+ "epoch": 0.329605650382578,
251
+ "grad_norm": 68.08348369771358,
252
+ "learning_rate": 4.2420751194279604e-07,
253
+ "logits/chosen": -0.47634777426719666,
254
+ "logits/rejected": 0.15769393742084503,
255
+ "logps/chosen": -1069.734619140625,
256
+ "logps/pi_response": -1077.26025390625,
257
+ "logps/ref_response": -385.5350646972656,
258
+ "logps/rejected": -1771.5843505859375,
259
+ "loss": 0.2586,
260
+ "rewards/accuracies": 0.9083333015441895,
261
+ "rewards/chosen": -4.997823238372803,
262
+ "rewards/margins": 4.333145618438721,
263
+ "rewards/rejected": -9.330968856811523,
264
+ "step": 140
265
+ },
266
+ {
267
+ "epoch": 0.3531489111241907,
268
+ "grad_norm": 163.9369240446969,
269
+ "learning_rate": 4.0884717268675306e-07,
270
+ "logits/chosen": -0.20242293179035187,
271
+ "logits/rejected": 0.2356901615858078,
272
+ "logps/chosen": -1135.7840576171875,
273
+ "logps/pi_response": -1698.245361328125,
274
+ "logps/ref_response": -393.395263671875,
275
+ "logps/rejected": -2191.13671875,
276
+ "loss": 0.2882,
277
+ "rewards/accuracies": 0.8999999761581421,
278
+ "rewards/chosen": -5.520671367645264,
279
+ "rewards/margins": 8.618199348449707,
280
+ "rewards/rejected": -14.138870239257812,
281
+ "step": 150
282
+ },
283
+ {
284
+ "epoch": 0.3766921718658034,
285
+ "grad_norm": 150.38185986907246,
286
+ "learning_rate": 3.9240743146996427e-07,
287
+ "logits/chosen": -0.22249138355255127,
288
+ "logits/rejected": 0.3604862689971924,
289
+ "logps/chosen": -981.8104248046875,
290
+ "logps/pi_response": -1485.242919921875,
291
+ "logps/ref_response": -374.290283203125,
292
+ "logps/rejected": -2042.9921875,
293
+ "loss": 0.3641,
294
+ "rewards/accuracies": 0.8250001072883606,
295
+ "rewards/chosen": -3.9730944633483887,
296
+ "rewards/margins": 8.374366760253906,
297
+ "rewards/rejected": -12.347461700439453,
298
+ "step": 160
299
+ },
300
+ {
301
+ "epoch": 0.4002354326074161,
302
+ "grad_norm": 80.54150444296407,
303
+ "learning_rate": 3.75e-07,
304
+ "logits/chosen": -0.11508075892925262,
305
+ "logits/rejected": 0.29167068004608154,
306
+ "logps/chosen": -1000.4583740234375,
307
+ "logps/pi_response": -1468.6123046875,
308
+ "logps/ref_response": -418.64288330078125,
309
+ "logps/rejected": -2010.297607421875,
310
+ "loss": 0.3769,
311
+ "rewards/accuracies": 0.875,
312
+ "rewards/chosen": -4.033980369567871,
313
+ "rewards/margins": 7.807894229888916,
314
+ "rewards/rejected": -11.841875076293945,
315
+ "step": 170
316
+ },
317
+ {
318
+ "epoch": 0.42377869334902885,
319
+ "grad_norm": 157.0982901424276,
320
+ "learning_rate": 3.5674316565549227e-07,
321
+ "logits/chosen": 0.3534625172615051,
322
+ "logits/rejected": 0.6805320382118225,
323
+ "logps/chosen": -1109.6680908203125,
324
+ "logps/pi_response": -1332.754638671875,
325
+ "logps/ref_response": -410.44232177734375,
326
+ "logps/rejected": -1955.057861328125,
327
+ "loss": 0.2771,
328
+ "rewards/accuracies": 0.8666666746139526,
329
+ "rewards/chosen": -5.245041370391846,
330
+ "rewards/margins": 6.281018257141113,
331
+ "rewards/rejected": -11.526060104370117,
332
+ "step": 180
333
+ },
334
+ {
335
+ "epoch": 0.44732195409064157,
336
+ "grad_norm": 138.9366989622839,
337
+ "learning_rate": 3.377609876970194e-07,
338
+ "logits/chosen": -0.06883997470140457,
339
+ "logits/rejected": 0.5686254501342773,
340
+ "logps/chosen": -1120.6253662109375,
341
+ "logps/pi_response": -1725.830810546875,
342
+ "logps/ref_response": -391.46697998046875,
343
+ "logps/rejected": -2330.28564453125,
344
+ "loss": 0.3201,
345
+ "rewards/accuracies": 0.85833340883255,
346
+ "rewards/chosen": -5.111655235290527,
347
+ "rewards/margins": 10.163948059082031,
348
+ "rewards/rejected": -15.275602340698242,
349
+ "step": 190
350
+ },
351
+ {
352
+ "epoch": 0.47086521483225424,
353
+ "grad_norm": 74.34016252556022,
354
+ "learning_rate": 3.1818245425676556e-07,
355
+ "logits/chosen": 0.038237761706113815,
356
+ "logits/rejected": 0.7730900049209595,
357
+ "logps/chosen": -1035.312744140625,
358
+ "logps/pi_response": -1930.384765625,
359
+ "logps/ref_response": -400.8553771972656,
360
+ "logps/rejected": -2589.25439453125,
361
+ "loss": 0.3094,
362
+ "rewards/accuracies": 0.9250000715255737,
363
+ "rewards/chosen": -4.300232410430908,
364
+ "rewards/margins": 12.978002548217773,
365
+ "rewards/rejected": -17.278234481811523,
366
+ "step": 200
367
+ },
368
+ {
369
+ "epoch": 0.49440847557386697,
370
+ "grad_norm": 135.89742493179222,
371
+ "learning_rate": 2.981406058353988e-07,
372
+ "logits/chosen": 1.6664092540740967,
373
+ "logits/rejected": 1.5270541906356812,
374
+ "logps/chosen": -1253.89013671875,
375
+ "logps/pi_response": -3124.22021484375,
376
+ "logps/ref_response": -380.658447265625,
377
+ "logps/rejected": -3518.0625,
378
+ "loss": 0.31,
379
+ "rewards/accuracies": 0.8916667103767395,
380
+ "rewards/chosen": -6.066841125488281,
381
+ "rewards/margins": 21.756336212158203,
382
+ "rewards/rejected": -27.823177337646484,
383
+ "step": 210
384
+ },
385
+ {
386
+ "epoch": 0.5179517363154797,
387
+ "grad_norm": 367.00195352780696,
388
+ "learning_rate": 2.7777163126220113e-07,
389
+ "logits/chosen": 1.1289093494415283,
390
+ "logits/rejected": 1.753485918045044,
391
+ "logps/chosen": -1161.3385009765625,
392
+ "logps/pi_response": -2382.952880859375,
393
+ "logps/ref_response": -397.78643798828125,
394
+ "logps/rejected": -2924.198974609375,
395
+ "loss": 0.3399,
396
+ "rewards/accuracies": 0.8416668176651001,
397
+ "rewards/chosen": -5.906828880310059,
398
+ "rewards/margins": 15.091941833496094,
399
+ "rewards/rejected": -20.998775482177734,
400
+ "step": 220
401
+ },
402
+ {
403
+ "epoch": 0.5414949970570924,
404
+ "grad_norm": 429.21214818334255,
405
+ "learning_rate": 2.5721394226160456e-07,
406
+ "logits/chosen": 0.7405129671096802,
407
+ "logits/rejected": 1.092500925064087,
408
+ "logps/chosen": -1127.665771484375,
409
+ "logps/pi_response": -3084.66748046875,
410
+ "logps/ref_response": -385.5183410644531,
411
+ "logps/rejected": -3698.11083984375,
412
+ "loss": 0.3142,
413
+ "rewards/accuracies": 0.824999988079071,
414
+ "rewards/chosen": -5.245604515075684,
415
+ "rewards/margins": 23.772628784179688,
416
+ "rewards/rejected": -29.018238067626953,
417
+ "step": 230
418
+ },
419
+ {
420
+ "epoch": 0.5650382577987051,
421
+ "grad_norm": 143.78300847624018,
422
+ "learning_rate": 2.3660723291465753e-07,
423
+ "logits/chosen": 0.8486520648002625,
424
+ "logits/rejected": 1.15015709400177,
425
+ "logps/chosen": -1044.281982421875,
426
+ "logps/pi_response": -2387.38623046875,
427
+ "logps/ref_response": -387.73004150390625,
428
+ "logps/rejected": -2920.59375,
429
+ "loss": 0.2255,
430
+ "rewards/accuracies": 0.9583333730697632,
431
+ "rewards/chosen": -4.885871887207031,
432
+ "rewards/margins": 15.968719482421875,
433
+ "rewards/rejected": -20.854591369628906,
434
+ "step": 240
435
+ },
436
+ {
437
+ "epoch": 0.5885815185403178,
438
+ "grad_norm": 125.17444301250116,
439
+ "learning_rate": 2.1609153040659357e-07,
440
+ "logits/chosen": 1.349198341369629,
441
+ "logits/rejected": 1.477452278137207,
442
+ "logps/chosen": -1202.511962890625,
443
+ "logps/pi_response": -4804.65576171875,
444
+ "logps/ref_response": -403.5562438964844,
445
+ "logps/rejected": -4992.322265625,
446
+ "loss": 0.2998,
447
+ "rewards/accuracies": 0.8333333134651184,
448
+ "rewards/chosen": -5.66055154800415,
449
+ "rewards/margins": 36.13991928100586,
450
+ "rewards/rejected": -41.800472259521484,
451
+ "step": 250
452
+ },
453
+ {
454
+ "epoch": 0.6121247792819305,
455
+ "grad_norm": 75.45331010998015,
456
+ "learning_rate": 1.9580624351088174e-07,
457
+ "logits/chosen": 1.8510982990264893,
458
+ "logits/rejected": 1.8215391635894775,
459
+ "logps/chosen": -1237.7900390625,
460
+ "logps/pi_response": -4366.1884765625,
461
+ "logps/ref_response": -402.7124328613281,
462
+ "logps/rejected": -4625.4326171875,
463
+ "loss": 0.2256,
464
+ "rewards/accuracies": 0.908333420753479,
465
+ "rewards/chosen": -6.537631034851074,
466
+ "rewards/margins": 31.707489013671875,
467
+ "rewards/rejected": -38.245121002197266,
468
+ "step": 260
469
+ },
470
+ {
471
+ "epoch": 0.6356680400235433,
472
+ "grad_norm": 121.70692272560186,
473
+ "learning_rate": 1.7588921527552315e-07,
474
+ "logits/chosen": 1.641945242881775,
475
+ "logits/rejected": 1.5603419542312622,
476
+ "logps/chosen": -1264.4483642578125,
477
+ "logps/pi_response": -4920.81396484375,
478
+ "logps/ref_response": -420.87274169921875,
479
+ "logps/rejected": -5146.81005859375,
480
+ "loss": 0.7912,
481
+ "rewards/accuracies": 0.9000000953674316,
482
+ "rewards/chosen": -6.559077262878418,
483
+ "rewards/margins": 36.41227722167969,
484
+ "rewards/rejected": -42.97135925292969,
485
+ "step": 270
486
+ },
487
+ {
488
+ "epoch": 0.659211300765156,
489
+ "grad_norm": 121.24373782517053,
490
+ "learning_rate": 1.564757863488017e-07,
491
+ "logits/chosen": 1.3223627805709839,
492
+ "logits/rejected": 2.0051231384277344,
493
+ "logps/chosen": -1246.6629638671875,
494
+ "logps/pi_response": -4004.585205078125,
495
+ "logps/ref_response": -386.71160888671875,
496
+ "logps/rejected": -4359.3427734375,
497
+ "loss": 0.1897,
498
+ "rewards/accuracies": 0.9333333969116211,
499
+ "rewards/chosen": -6.822081089019775,
500
+ "rewards/margins": 27.98373794555664,
501
+ "rewards/rejected": -34.80581283569336,
502
+ "step": 280
503
+ },
504
+ {
505
+ "epoch": 0.6827545615067687,
506
+ "grad_norm": 830.0337931185638,
507
+ "learning_rate": 1.3769787530939818e-07,
508
+ "logits/chosen": 1.528591275215149,
509
+ "logits/rejected": 2.117349147796631,
510
+ "logps/chosen": -1309.528564453125,
511
+ "logps/pi_response": -5731.9384765625,
512
+ "logps/ref_response": -398.12322998046875,
513
+ "logps/rejected": -5667.63916015625,
514
+ "loss": 0.297,
515
+ "rewards/accuracies": 0.9166668057441711,
516
+ "rewards/chosen": -7.466167449951172,
517
+ "rewards/margins": 41.12438201904297,
518
+ "rewards/rejected": -48.590553283691406,
519
+ "step": 290
520
+ },
521
+ {
522
+ "epoch": 0.7062978222483814,
523
+ "grad_norm": 152.99540073883847,
524
+ "learning_rate": 1.19683082250231e-07,
525
+ "logits/chosen": 1.3115044832229614,
526
+ "logits/rejected": 1.6584991216659546,
527
+ "logps/chosen": -1225.9017333984375,
528
+ "logps/pi_response": -4866.2646484375,
529
+ "logps/ref_response": -388.7275085449219,
530
+ "logps/rejected": -4971.64306640625,
531
+ "loss": 0.2874,
532
+ "rewards/accuracies": 0.8833333253860474,
533
+ "rewards/chosen": -6.632820129394531,
534
+ "rewards/margins": 34.09852600097656,
535
+ "rewards/rejected": -40.731346130371094,
536
+ "step": 300
537
+ },
538
+ {
539
+ "epoch": 0.7298410829899941,
540
+ "grad_norm": 119.56456897387905,
541
+ "learning_rate": 1.0255382170737015e-07,
542
+ "logits/chosen": 1.3735281229019165,
543
+ "logits/rejected": 1.32486891746521,
544
+ "logps/chosen": -1208.898193359375,
545
+ "logps/pi_response": -5530.7060546875,
546
+ "logps/ref_response": -391.98046875,
547
+ "logps/rejected": -6051.8076171875,
548
+ "loss": 0.591,
549
+ "rewards/accuracies": 0.8666666746139526,
550
+ "rewards/chosen": -6.081249713897705,
551
+ "rewards/margins": 46.203948974609375,
552
+ "rewards/rejected": -52.28520584106445,
553
+ "step": 310
554
+ },
555
+ {
556
+ "epoch": 0.7533843437316068,
557
+ "grad_norm": 108.85771777397696,
558
+ "learning_rate": 8.642649082596692e-08,
559
+ "logits/chosen": 1.5552794933319092,
560
+ "logits/rejected": 1.771057367324829,
561
+ "logps/chosen": -1233.845947265625,
562
+ "logps/pi_response": -5965.13623046875,
563
+ "logps/ref_response": -382.88055419921875,
564
+ "logps/rejected": -6220.9912109375,
565
+ "loss": 0.4376,
566
+ "rewards/accuracies": 0.8916667699813843,
567
+ "rewards/chosen": -6.246255397796631,
568
+ "rewards/margins": 48.06293487548828,
569
+ "rewards/rejected": -54.30919647216797,
570
+ "step": 320
571
+ },
572
+ {
573
+ "epoch": 0.7769276044732195,
574
+ "grad_norm": 64.98595778509755,
575
+ "learning_rate": 7.141067841569633e-08,
576
+ "logits/chosen": 1.0594632625579834,
577
+ "logits/rejected": 1.5874229669570923,
578
+ "logps/chosen": -1108.4306640625,
579
+ "logps/pi_response": -3888.35400390625,
580
+ "logps/ref_response": -377.4826354980469,
581
+ "logps/rejected": -4457.27685546875,
582
+ "loss": 0.6749,
583
+ "rewards/accuracies": 0.908333420753479,
584
+ "rewards/chosen": -5.92837381362915,
585
+ "rewards/margins": 30.60651206970215,
586
+ "rewards/rejected": -36.534889221191406,
587
+ "step": 330
588
+ },
589
+ {
590
+ "epoch": 0.8004708652148322,
591
+ "grad_norm": 114.35171172829054,
592
+ "learning_rate": 5.7608420270357614e-08,
593
+ "logits/chosen": 1.8669030666351318,
594
+ "logits/rejected": 2.0596258640289307,
595
+ "logps/chosen": -1213.849609375,
596
+ "logps/pi_response": -4912.9462890625,
597
+ "logps/ref_response": -396.6376037597656,
598
+ "logps/rejected": -4539.1298828125,
599
+ "loss": 0.435,
600
+ "rewards/accuracies": 0.9000000953674316,
601
+ "rewards/chosen": -6.412614345550537,
602
+ "rewards/margins": 30.942264556884766,
603
+ "rewards/rejected": -37.354881286621094,
604
+ "step": 340
605
+ },
606
+ {
607
+ "epoch": 0.824014125956445,
608
+ "grad_norm": 179.13336254225945,
609
+ "learning_rate": 4.511350581190129e-08,
610
+ "logits/chosen": 1.3449863195419312,
611
+ "logits/rejected": 1.5248844623565674,
612
+ "logps/chosen": -1232.9034423828125,
613
+ "logps/pi_response": -6418.89697265625,
614
+ "logps/ref_response": -406.46099853515625,
615
+ "logps/rejected": -5906.890625,
616
+ "loss": 0.2574,
617
+ "rewards/accuracies": 0.85833340883255,
618
+ "rewards/chosen": -6.55679178237915,
619
+ "rewards/margins": 44.44318771362305,
620
+ "rewards/rejected": -50.99998092651367,
621
+ "step": 350
622
+ },
623
+ {
624
+ "epoch": 0.8475573866980577,
625
+ "grad_norm": 100.07236020794019,
626
+ "learning_rate": 3.401084077039293e-08,
627
+ "logits/chosen": 1.0567926168441772,
628
+ "logits/rejected": 1.0804252624511719,
629
+ "logps/chosen": -1290.2373046875,
630
+ "logps/pi_response": -5767.3994140625,
631
+ "logps/ref_response": -370.0479431152344,
632
+ "logps/rejected": -6238.93115234375,
633
+ "loss": 0.182,
634
+ "rewards/accuracies": 0.925000011920929,
635
+ "rewards/chosen": -6.525579929351807,
636
+ "rewards/margins": 48.529579162597656,
637
+ "rewards/rejected": -55.0551643371582,
638
+ "step": 360
639
+ },
640
+ {
641
+ "epoch": 0.8711006474396704,
642
+ "grad_norm": 154.01389939852425,
643
+ "learning_rate": 2.4375870230643413e-08,
644
+ "logits/chosen": 1.2523785829544067,
645
+ "logits/rejected": 0.6351985931396484,
646
+ "logps/chosen": -1255.387451171875,
647
+ "logps/pi_response": -7876.88427734375,
648
+ "logps/ref_response": -419.7919006347656,
649
+ "logps/rejected": -7547.12353515625,
650
+ "loss": 0.8119,
651
+ "rewards/accuracies": 0.8999999761581421,
652
+ "rewards/chosen": -6.398593902587891,
653
+ "rewards/margins": 61.322898864746094,
654
+ "rewards/rejected": -67.72149658203125,
655
+ "step": 370
656
+ },
657
+ {
658
+ "epoch": 0.8946439081812831,
659
+ "grad_norm": 78.3832326659616,
660
+ "learning_rate": 1.627406596603359e-08,
661
+ "logits/chosen": 1.103989839553833,
662
+ "logits/rejected": 1.358469843864441,
663
+ "logps/chosen": -1181.985595703125,
664
+ "logps/pi_response": -6574.1669921875,
665
+ "logps/ref_response": -373.62188720703125,
666
+ "logps/rejected": -7001.06103515625,
667
+ "loss": 0.1925,
668
+ "rewards/accuracies": 0.89166659116745,
669
+ "rewards/chosen": -6.272666931152344,
670
+ "rewards/margins": 55.82216262817383,
671
+ "rewards/rejected": -62.094825744628906,
672
+ "step": 380
673
+ },
674
+ {
675
+ "epoch": 0.9181871689228959,
676
+ "grad_norm": 127.93583130286875,
677
+ "learning_rate": 9.760481543214128e-09,
678
+ "logits/chosen": 1.224596619606018,
679
+ "logits/rejected": 1.153881311416626,
680
+ "logps/chosen": -1271.9290771484375,
681
+ "logps/pi_response": -6838.2158203125,
682
+ "logps/ref_response": -396.5231628417969,
683
+ "logps/rejected": -7279.1904296875,
684
+ "loss": 0.8634,
685
+ "rewards/accuracies": 0.85833340883255,
686
+ "rewards/chosen": -6.516213417053223,
687
+ "rewards/margins": 58.329864501953125,
688
+ "rewards/rejected": -64.84608459472656,
689
+ "step": 390
690
+ },
691
+ {
692
+ "epoch": 0.9417304296645085,
693
+ "grad_norm": 70.22000102721056,
694
+ "learning_rate": 4.879378220843666e-09,
695
+ "logits/chosen": 0.9867501258850098,
696
+ "logits/rejected": 1.4171621799468994,
697
+ "logps/chosen": -1219.076416015625,
698
+ "logps/pi_response": -6917.1083984375,
699
+ "logps/ref_response": -392.57037353515625,
700
+ "logps/rejected": -6101.5478515625,
701
+ "loss": 0.225,
702
+ "rewards/accuracies": 0.9166666865348816,
703
+ "rewards/chosen": -6.3670525550842285,
704
+ "rewards/margins": 46.422969818115234,
705
+ "rewards/rejected": -52.7900276184082,
706
+ "step": 400
707
+ },
708
+ {
709
+ "epoch": 0.9652736904061212,
710
+ "grad_norm": 138.07733003631637,
711
+ "learning_rate": 1.6639241844659535e-09,
712
+ "logits/chosen": 1.2070242166519165,
713
+ "logits/rejected": 1.469334363937378,
714
+ "logps/chosen": -1205.38525390625,
715
+ "logps/pi_response": -5451.77392578125,
716
+ "logps/ref_response": -356.38177490234375,
717
+ "logps/rejected": -5381.77587890625,
718
+ "loss": 0.2645,
719
+ "rewards/accuracies": 0.8999999761581421,
720
+ "rewards/chosen": -6.463148593902588,
721
+ "rewards/margins": 39.27529525756836,
722
+ "rewards/rejected": -45.73843765258789,
723
+ "step": 410
724
+ },
725
+ {
726
+ "epoch": 0.9888169511477339,
727
+ "grad_norm": 52.54825181887789,
728
+ "learning_rate": 1.359691612926872e-10,
729
+ "logits/chosen": 1.1104196310043335,
730
+ "logits/rejected": 0.860076904296875,
731
+ "logps/chosen": -1356.6370849609375,
732
+ "logps/pi_response": -6781.0595703125,
733
+ "logps/ref_response": -390.4653015136719,
734
+ "logps/rejected": -7239.3955078125,
735
+ "loss": 0.3053,
736
+ "rewards/accuracies": 0.9333333969116211,
737
+ "rewards/chosen": -7.076499938964844,
738
+ "rewards/margins": 57.4049072265625,
739
+ "rewards/rejected": -64.48140716552734,
740
+ "step": 420
741
+ },
742
+ {
743
+ "epoch": 0.998234255444379,
744
+ "step": 424,
745
+ "total_flos": 0.0,
746
+ "train_loss": 0.387972928302468,
747
+ "train_runtime": 9484.4674,
748
+ "train_samples_per_second": 1.611,
749
+ "train_steps_per_second": 0.045
750
+ }
751
+ ],
752
+ "logging_steps": 10,
753
+ "max_steps": 424,
754
+ "num_input_tokens_seen": 0,
755
+ "num_train_epochs": 1,
756
+ "save_steps": 100,
757
+ "stateful_callbacks": {
758
+ "TrainerControl": {
759
+ "args": {
760
+ "should_epoch_stop": false,
761
+ "should_evaluate": false,
762
+ "should_log": false,
763
+ "should_save": true,
764
+ "should_training_stop": false
765
+ },
766
+ "attributes": {}
767
+ }
768
+ },
769
+ "total_flos": 0.0,
770
+ "train_batch_size": 3,
771
+ "trial_name": null,
772
+ "trial_params": null
773
+ }