BraylonDash commited on
Commit
40141dd
·
verified ·
1 Parent(s): 37f342d

Model save

Browse files
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: DUAL-GPO/phi-2-dpo-chatml-merged
9
+ model-index:
10
+ - name: phi-2-dpo-chatml-lora-i1
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # phi-2-dpo-chatml-lora-i1
18
+
19
+ This model is a fine-tuned version of [DUAL-GPO/phi-2-dpo-chatml-merged](https://huggingface.co/DUAL-GPO/phi-2-dpo-chatml-merged) on the None dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-06
39
+ - train_batch_size: 4
40
+ - eval_batch_size: 4
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 4
44
+ - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 64
46
+ - total_eval_batch_size: 16
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_ratio: 0.1
50
+ - num_epochs: 1
51
+
52
+ ### Training results
53
+
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - PEFT 0.7.1
59
+ - Transformers 4.36.2
60
+ - Pytorch 2.1.2
61
+ - Datasets 2.14.6
62
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4e5809bfc80035436a4e26758583729cb5e4a62d46126d717629da2e65a071f
3
  size 167807296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fd983aaf64c3d7928e0b40894ee454af401bde5ca0008895b9e981b9672d2d
3
  size 167807296
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6665561169198474,
4
+ "train_runtime": 4180.8924,
5
+ "train_samples": 20378,
6
+ "train_samples_per_second": 4.874,
7
+ "train_steps_per_second": 0.076
8
+ }
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2024-09-10T17:05:50,codecarbon,17448811-59dc-446c-b419-8adc8b1216c5,4180.905763626099,0.003907930783756967,9.347091287624764e-07,42.5,1134.73,188.74309015274048,0.049357304409808546,1.3756144027881374,0.21893714960478902,1.6439088568027338,Canada,CAN,quebec,,,Linux-5.15.0-84-generic-x86_64-with-glibc2.35,3.10.14,2.2.3,32,Intel(R) Xeon(R) W-3335 CPU @ 3.40GHz,4,4 x NVIDIA GeForce RTX 4090,-71.2,46.8,503.3149070739746,machine,N,1.0
runs/Sep10_15-50-31_gpu4-119-5/events.out.tfevents.1725947769.gpu4-119-5.605072.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998b396e55e9e6c0ff15ea9968f883b0a1b11c9fc8e67a28d36cbe877e6ab258
3
- size 24413
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dfeb098eee7b325e7c38e173ee8339ba273e5456794156550fe8c7226d0f295
3
+ size 25401
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6665561169198474,
4
+ "train_runtime": 4180.8924,
5
+ "train_samples": 20378,
6
+ "train_samples_per_second": 4.874,
7
+ "train_steps_per_second": 0.076
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984301412872841,
5
+ "eval_steps": 500,
6
+ "global_step": 318,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.5625e-07,
14
+ "logits/chosen": 0.1065371111035347,
15
+ "logits/rejected": 0.2458750307559967,
16
+ "logps/chosen": -576.7586669921875,
17
+ "logps/rejected": -601.521240234375,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 1.5625e-06,
28
+ "logits/chosen": 0.2651256322860718,
29
+ "logits/rejected": 0.24446943402290344,
30
+ "logps/chosen": -421.94403076171875,
31
+ "logps/rejected": -430.0181884765625,
32
+ "loss": 0.6932,
33
+ "rewards/accuracies": 0.3819444477558136,
34
+ "rewards/chosen": 0.00013627602311316878,
35
+ "rewards/margins": -4.138438089285046e-05,
36
+ "rewards/rejected": 0.00017766041855793446,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.06,
41
+ "learning_rate": 3.125e-06,
42
+ "logits/chosen": 0.19487406313419342,
43
+ "logits/rejected": 0.27559036016464233,
44
+ "logps/chosen": -469.13458251953125,
45
+ "logps/rejected": -477.4700622558594,
46
+ "loss": 0.693,
47
+ "rewards/accuracies": 0.550000011920929,
48
+ "rewards/chosen": -0.0018459655111655593,
49
+ "rewards/margins": 0.0005293375579640269,
50
+ "rewards/rejected": -0.00237530330196023,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.09,
55
+ "learning_rate": 4.6875000000000004e-06,
56
+ "logits/chosen": 0.25393253564834595,
57
+ "logits/rejected": 0.32423219084739685,
58
+ "logps/chosen": -524.4780883789062,
59
+ "logps/rejected": -515.9963989257812,
60
+ "loss": 0.6924,
61
+ "rewards/accuracies": 0.581250011920929,
62
+ "rewards/chosen": -0.010388225317001343,
63
+ "rewards/margins": 0.0017380230128765106,
64
+ "rewards/rejected": -0.012126248329877853,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.13,
69
+ "learning_rate": 4.9903533134293035e-06,
70
+ "logits/chosen": 0.188622385263443,
71
+ "logits/rejected": 0.23987522721290588,
72
+ "logps/chosen": -524.7830810546875,
73
+ "logps/rejected": -532.3538208007812,
74
+ "loss": 0.6904,
75
+ "rewards/accuracies": 0.5625,
76
+ "rewards/chosen": -0.038737986236810684,
77
+ "rewards/margins": 0.007083290722221136,
78
+ "rewards/rejected": -0.04582127556204796,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.16,
83
+ "learning_rate": 4.95129120635556e-06,
84
+ "logits/chosen": 0.2065356969833374,
85
+ "logits/rejected": 0.21540746092796326,
86
+ "logps/chosen": -557.65283203125,
87
+ "logps/rejected": -602.3500366210938,
88
+ "loss": 0.6886,
89
+ "rewards/accuracies": 0.5249999761581421,
90
+ "rewards/chosen": -0.081453338265419,
91
+ "rewards/margins": 0.008762759156525135,
92
+ "rewards/rejected": -0.09021610021591187,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.19,
97
+ "learning_rate": 4.882681251368549e-06,
98
+ "logits/chosen": 0.1865241825580597,
99
+ "logits/rejected": 0.25268620252609253,
100
+ "logps/chosen": -651.5568237304688,
101
+ "logps/rejected": -646.779296875,
102
+ "loss": 0.6882,
103
+ "rewards/accuracies": 0.543749988079071,
104
+ "rewards/chosen": -0.10856065899133682,
105
+ "rewards/margins": 0.00851230975240469,
106
+ "rewards/rejected": -0.11707296222448349,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.22,
111
+ "learning_rate": 4.785350472409792e-06,
112
+ "logits/chosen": 0.12973304092884064,
113
+ "logits/rejected": 0.15445570647716522,
114
+ "logps/chosen": -573.20361328125,
115
+ "logps/rejected": -620.1366577148438,
116
+ "loss": 0.6862,
117
+ "rewards/accuracies": 0.581250011920929,
118
+ "rewards/chosen": -0.11544144153594971,
119
+ "rewards/margins": 0.014589125290513039,
120
+ "rewards/rejected": -0.130030557513237,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.25,
125
+ "learning_rate": 4.660472094042121e-06,
126
+ "logits/chosen": 0.13113337755203247,
127
+ "logits/rejected": 0.16033154726028442,
128
+ "logps/chosen": -604.9552001953125,
129
+ "logps/rejected": -617.4078369140625,
130
+ "loss": 0.6848,
131
+ "rewards/accuracies": 0.5687500238418579,
132
+ "rewards/chosen": -0.12029469013214111,
133
+ "rewards/margins": 0.01667841710150242,
134
+ "rewards/rejected": -0.13697311282157898,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.28,
139
+ "learning_rate": 4.509551399408598e-06,
140
+ "logits/chosen": 0.09812867641448975,
141
+ "logits/rejected": 0.18707698583602905,
142
+ "logps/chosen": -654.7269897460938,
143
+ "logps/rejected": -692.5885009765625,
144
+ "loss": 0.6818,
145
+ "rewards/accuracies": 0.59375,
146
+ "rewards/chosen": -0.16067926585674286,
147
+ "rewards/margins": 0.029583226889371872,
148
+ "rewards/rejected": -0.19026246666908264,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.31,
153
+ "learning_rate": 4.33440758555951e-06,
154
+ "logits/chosen": 0.09072402864694595,
155
+ "logits/rejected": 0.11204621940851212,
156
+ "logps/chosen": -659.927734375,
157
+ "logps/rejected": -693.5866088867188,
158
+ "loss": 0.6779,
159
+ "rewards/accuracies": 0.59375,
160
+ "rewards/chosen": -0.20416970551013947,
161
+ "rewards/margins": 0.037873029708862305,
162
+ "rewards/rejected": -0.24204275012016296,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.35,
167
+ "learning_rate": 4.137151834863213e-06,
168
+ "logits/chosen": 0.07093264162540436,
169
+ "logits/rejected": 0.03249276801943779,
170
+ "logps/chosen": -743.2816162109375,
171
+ "logps/rejected": -755.5554809570312,
172
+ "loss": 0.6775,
173
+ "rewards/accuracies": 0.550000011920929,
174
+ "rewards/chosen": -0.2279289960861206,
175
+ "rewards/margins": 0.02968643605709076,
176
+ "rewards/rejected": -0.25761544704437256,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.38,
181
+ "learning_rate": 3.92016186682789e-06,
182
+ "logits/chosen": 0.003220717655494809,
183
+ "logits/rejected": 0.05324220657348633,
184
+ "logps/chosen": -742.3509521484375,
185
+ "logps/rejected": -809.758544921875,
186
+ "loss": 0.6673,
187
+ "rewards/accuracies": 0.581250011920929,
188
+ "rewards/chosen": -0.2514348328113556,
189
+ "rewards/margins": 0.04873809963464737,
190
+ "rewards/rejected": -0.30017292499542236,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.41,
195
+ "learning_rate": 3.686053277086401e-06,
196
+ "logits/chosen": 0.015957411378622055,
197
+ "logits/rejected": 0.034166835248470306,
198
+ "logps/chosen": -800.9042358398438,
199
+ "logps/rejected": -877.7219848632812,
200
+ "loss": 0.6688,
201
+ "rewards/accuracies": 0.5687500238418579,
202
+ "rewards/chosen": -0.3339093327522278,
203
+ "rewards/margins": 0.05545445531606674,
204
+ "rewards/rejected": -0.3893638253211975,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.44,
209
+ "learning_rate": 3.437648009023905e-06,
210
+ "logits/chosen": -0.06793640553951263,
211
+ "logits/rejected": 0.012782419100403786,
212
+ "logps/chosen": -928.662109375,
213
+ "logps/rejected": -970.1724853515625,
214
+ "loss": 0.6673,
215
+ "rewards/accuracies": 0.543749988079071,
216
+ "rewards/chosen": -0.40538254380226135,
217
+ "rewards/margins": 0.05219917744398117,
218
+ "rewards/rejected": -0.45758169889450073,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.47,
223
+ "learning_rate": 3.177940338091043e-06,
224
+ "logits/chosen": -0.07451646029949188,
225
+ "logits/rejected": 0.017870059236884117,
226
+ "logps/chosen": -945.6070556640625,
227
+ "logps/rejected": -1063.8753662109375,
228
+ "loss": 0.6552,
229
+ "rewards/accuracies": 0.6000000238418579,
230
+ "rewards/chosen": -0.47644931077957153,
231
+ "rewards/margins": 0.11226899921894073,
232
+ "rewards/rejected": -0.5887182950973511,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.5,
237
+ "learning_rate": 2.9100607788275547e-06,
238
+ "logits/chosen": -0.013520196080207825,
239
+ "logits/rejected": 0.03256853669881821,
240
+ "logps/chosen": -1152.884521484375,
241
+ "logps/rejected": -1294.5550537109375,
242
+ "loss": 0.6459,
243
+ "rewards/accuracies": 0.5562499761581421,
244
+ "rewards/chosen": -0.629438579082489,
245
+ "rewards/margins": 0.12635770440101624,
246
+ "rewards/rejected": -0.7557963132858276,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.53,
251
+ "learning_rate": 2.637238349660819e-06,
252
+ "logits/chosen": -0.03625180199742317,
253
+ "logits/rejected": 0.10911421477794647,
254
+ "logps/chosen": -1153.629638671875,
255
+ "logps/rejected": -1299.8240966796875,
256
+ "loss": 0.6485,
257
+ "rewards/accuracies": 0.5874999761581421,
258
+ "rewards/chosen": -0.6664064526557922,
259
+ "rewards/margins": 0.15015219151973724,
260
+ "rewards/rejected": -0.8165585398674011,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.57,
265
+ "learning_rate": 2.3627616503391813e-06,
266
+ "logits/chosen": 0.02950824238359928,
267
+ "logits/rejected": 0.05250490829348564,
268
+ "logps/chosen": -1152.2685546875,
269
+ "logps/rejected": -1324.5938720703125,
270
+ "loss": 0.6529,
271
+ "rewards/accuracies": 0.550000011920929,
272
+ "rewards/chosen": -0.6731899380683899,
273
+ "rewards/margins": 0.13111469149589539,
274
+ "rewards/rejected": -0.8043045997619629,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.6,
279
+ "learning_rate": 2.089939221172446e-06,
280
+ "logits/chosen": 0.04129552096128464,
281
+ "logits/rejected": 0.1471545547246933,
282
+ "logps/chosen": -1160.955810546875,
283
+ "logps/rejected": -1316.416259765625,
284
+ "loss": 0.6554,
285
+ "rewards/accuracies": 0.59375,
286
+ "rewards/chosen": -0.6231328845024109,
287
+ "rewards/margins": 0.14521253108978271,
288
+ "rewards/rejected": -0.7683453559875488,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.63,
293
+ "learning_rate": 1.8220596619089576e-06,
294
+ "logits/chosen": 0.06950052827596664,
295
+ "logits/rejected": 0.114873506128788,
296
+ "logps/chosen": -1067.883056640625,
297
+ "logps/rejected": -1257.954345703125,
298
+ "loss": 0.6662,
299
+ "rewards/accuracies": 0.5687500238418579,
300
+ "rewards/chosen": -0.580407977104187,
301
+ "rewards/margins": 0.15041552484035492,
302
+ "rewards/rejected": -0.7308235168457031,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.66,
307
+ "learning_rate": 1.5623519909760953e-06,
308
+ "logits/chosen": 0.05648995563387871,
309
+ "logits/rejected": 0.09691180288791656,
310
+ "logps/chosen": -965.3287963867188,
311
+ "logps/rejected": -1081.1578369140625,
312
+ "loss": 0.651,
313
+ "rewards/accuracies": 0.5375000238418579,
314
+ "rewards/chosen": -0.5061975717544556,
315
+ "rewards/margins": 0.0916539654135704,
316
+ "rewards/rejected": -0.5978515148162842,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.69,
321
+ "learning_rate": 1.3139467229135999e-06,
322
+ "logits/chosen": 0.051130689680576324,
323
+ "logits/rejected": 0.04953103885054588,
324
+ "logps/chosen": -984.7945556640625,
325
+ "logps/rejected": -1096.2000732421875,
326
+ "loss": 0.6587,
327
+ "rewards/accuracies": 0.53125,
328
+ "rewards/chosen": -0.5363761186599731,
329
+ "rewards/margins": 0.09475774317979813,
330
+ "rewards/rejected": -0.6311338543891907,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.72,
335
+ "learning_rate": 1.079838133172111e-06,
336
+ "logits/chosen": 0.05980740860104561,
337
+ "logits/rejected": 0.09383749216794968,
338
+ "logps/chosen": -1028.142822265625,
339
+ "logps/rejected": -1146.3697509765625,
340
+ "loss": 0.6478,
341
+ "rewards/accuracies": 0.581250011920929,
342
+ "rewards/chosen": -0.537701427936554,
343
+ "rewards/margins": 0.12060447037220001,
344
+ "rewards/rejected": -0.6583058834075928,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.75,
349
+ "learning_rate": 8.628481651367876e-07,
350
+ "logits/chosen": 0.014723904430866241,
351
+ "logits/rejected": 0.06809697300195694,
352
+ "logps/chosen": -1050.84716796875,
353
+ "logps/rejected": -1169.0181884765625,
354
+ "loss": 0.6523,
355
+ "rewards/accuracies": 0.550000011920929,
356
+ "rewards/chosen": -0.5698201060295105,
357
+ "rewards/margins": 0.10493580996990204,
358
+ "rewards/rejected": -0.6747559309005737,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.78,
363
+ "learning_rate": 6.655924144404907e-07,
364
+ "logits/chosen": 0.058697450906038284,
365
+ "logits/rejected": 0.18674160540103912,
366
+ "logps/chosen": -1031.38330078125,
367
+ "logps/rejected": -1151.7845458984375,
368
+ "loss": 0.6503,
369
+ "rewards/accuracies": 0.5562499761581421,
370
+ "rewards/chosen": -0.5323927998542786,
371
+ "rewards/margins": 0.12208826839923859,
372
+ "rewards/rejected": -0.654481053352356,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.82,
377
+ "learning_rate": 4.904486005914027e-07,
378
+ "logits/chosen": 0.038037996739149094,
379
+ "logits/rejected": 0.1984243094921112,
380
+ "logps/chosen": -1104.4632568359375,
381
+ "logps/rejected": -1201.5025634765625,
382
+ "loss": 0.6687,
383
+ "rewards/accuracies": 0.5375000238418579,
384
+ "rewards/chosen": -0.6090129613876343,
385
+ "rewards/margins": 0.08677474409341812,
386
+ "rewards/rejected": -0.6957876086235046,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.85,
391
+ "learning_rate": 3.3952790595787986e-07,
392
+ "logits/chosen": 0.08488737046718597,
393
+ "logits/rejected": 0.08265082538127899,
394
+ "logps/chosen": -1128.706787109375,
395
+ "logps/rejected": -1274.176513671875,
396
+ "loss": 0.6428,
397
+ "rewards/accuracies": 0.5874999761581421,
398
+ "rewards/chosen": -0.5869961977005005,
399
+ "rewards/margins": 0.14979645609855652,
400
+ "rewards/rejected": -0.7367926836013794,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.88,
405
+ "learning_rate": 2.1464952759020857e-07,
406
+ "logits/chosen": 0.0698360875248909,
407
+ "logits/rejected": 0.20901212096214294,
408
+ "logps/chosen": -1054.747802734375,
409
+ "logps/rejected": -1169.133544921875,
410
+ "loss": 0.6518,
411
+ "rewards/accuracies": 0.5625,
412
+ "rewards/chosen": -0.5615522265434265,
413
+ "rewards/margins": 0.09707958251237869,
414
+ "rewards/rejected": -0.6586318612098694,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.91,
419
+ "learning_rate": 1.1731874863145143e-07,
420
+ "logits/chosen": 0.051002971827983856,
421
+ "logits/rejected": 0.1305128037929535,
422
+ "logps/chosen": -1106.5140380859375,
423
+ "logps/rejected": -1212.6392822265625,
424
+ "loss": 0.6598,
425
+ "rewards/accuracies": 0.574999988079071,
426
+ "rewards/chosen": -0.6055802702903748,
427
+ "rewards/margins": 0.09625270962715149,
428
+ "rewards/rejected": -0.7018329501152039,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.94,
433
+ "learning_rate": 4.870879364444109e-08,
434
+ "logits/chosen": 0.07181545346975327,
435
+ "logits/rejected": 0.10806989669799805,
436
+ "logps/chosen": -1016.3053588867188,
437
+ "logps/rejected": -1113.5516357421875,
438
+ "loss": 0.6518,
439
+ "rewards/accuracies": 0.518750011920929,
440
+ "rewards/chosen": -0.551072895526886,
441
+ "rewards/margins": 0.10785824060440063,
442
+ "rewards/rejected": -0.6589311361312866,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.97,
447
+ "learning_rate": 9.646686570697062e-09,
448
+ "logits/chosen": 0.059725649654865265,
449
+ "logits/rejected": 0.14064475893974304,
450
+ "logps/chosen": -1115.263427734375,
451
+ "logps/rejected": -1200.1612548828125,
452
+ "loss": 0.6489,
453
+ "rewards/accuracies": 0.518750011920929,
454
+ "rewards/chosen": -0.5870085954666138,
455
+ "rewards/margins": 0.10603809356689453,
456
+ "rewards/rejected": -0.6930466890335083,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 1.0,
461
+ "step": 318,
462
+ "total_flos": 0.0,
463
+ "train_loss": 0.6665561169198474,
464
+ "train_runtime": 4180.8924,
465
+ "train_samples_per_second": 4.874,
466
+ "train_steps_per_second": 0.076
467
+ }
468
+ ],
469
+ "logging_steps": 10,
470
+ "max_steps": 318,
471
+ "num_input_tokens_seen": 0,
472
+ "num_train_epochs": 1,
473
+ "save_steps": 100,
474
+ "total_flos": 0.0,
475
+ "train_batch_size": 4,
476
+ "trial_name": null,
477
+ "trial_params": null
478
+ }