silviasapora commited on
Commit
4f54a97
·
verified ·
1 Parent(s): 89d529e

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -7
  2. all_results.json +3 -3
  3. train_results.json +3 -3
  4. trainer_state.json +689 -689
README.md CHANGED
@@ -1,20 +1,17 @@
1
  ---
2
  base_model: google/gemma-7b
3
- datasets:
4
- - argilla/dpo-mix-7k
5
  library_name: transformers
6
- model_name: google/gemma-7b
7
  tags:
8
  - generated_from_trainer
9
- - alignment-handbook
10
  - trl
11
  - orpo
12
  licence: license
13
  ---
14
 
15
- # Model Card for google/gemma-7b
16
 
17
- This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the [['argilla/dpo-mix-7k']](https://huggingface.co/datasets/['argilla/dpo-mix-7k']) dataset.
18
  It has been trained using [TRL](https://github.com/huggingface/trl).
19
 
20
  ## Quick start
@@ -30,7 +27,7 @@ print(output["generated_text"])
30
 
31
  ## Training procedure
32
 
33
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/silvias/huggingface/runs/jxlh26x2)
34
 
35
 
36
  This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
 
1
  ---
2
  base_model: google/gemma-7b
 
 
3
  library_name: transformers
4
+ model_name: gemma-7b-silvia-basic-5e-5-v4
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - orpo
9
  licence: license
10
  ---
11
 
12
+ # Model Card for gemma-7b-silvia-basic-5e-5-v4
13
 
14
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/silvias/huggingface/runs/0lavtiko)
31
 
32
 
33
  This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.9765925925925925,
3
  "total_flos": 0.0,
4
- "train_loss": -61.895723918127636,
5
- "train_runtime": 9291.6158,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 2.179,
8
  "train_steps_per_second": 0.034
9
  }
 
1
  {
2
  "epoch": 2.9765925925925925,
3
  "total_flos": 0.0,
4
+ "train_loss": -125.51550612676711,
5
+ "train_runtime": 9350.753,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 2.166,
8
  "train_steps_per_second": 0.034
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.9765925925925925,
3
  "total_flos": 0.0,
4
- "train_loss": -61.895723918127636,
5
- "train_runtime": 9291.6158,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 2.179,
8
  "train_steps_per_second": 0.034
9
  }
 
1
  {
2
  "epoch": 2.9765925925925925,
3
  "total_flos": 0.0,
4
+ "train_loss": -125.51550612676711,
5
+ "train_runtime": 9350.753,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 2.166,
8
  "train_steps_per_second": 0.034
9
  }
trainer_state.json CHANGED
@@ -10,1019 +10,1019 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.047407407407407405,
13
- "grad_norm": 1309.7264404296875,
14
  "learning_rate": 7.8125e-06,
15
- "log_odds_chosen": -1.0092629194259644,
16
- "log_odds_ratio": -10.180293083190918,
17
- "logps/chosen": -21.700639724731445,
18
- "logps/rejected": -20.691608428955078,
19
- "loss": 710.1368,
20
- "nll_loss": 9.67548656463623,
21
  "rewards/accuracies": 0.484375,
22
- "rewards/chosen": -10.850319862365723,
23
- "rewards/margins": -0.5045148134231567,
24
- "rewards/rejected": -10.345804214477539,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 0.09481481481481481,
29
- "grad_norm": 1233.22119140625,
30
  "learning_rate": 1.5625e-05,
31
- "log_odds_chosen": -2.799683094024658,
32
- "log_odds_ratio": -10.153043746948242,
33
- "logps/chosen": -20.964075088500977,
34
- "logps/rejected": -18.164752960205078,
35
- "loss": 714.4481,
36
- "nll_loss": 8.683107376098633,
37
  "rewards/accuracies": 0.44062501192092896,
38
- "rewards/chosen": -10.482037544250488,
39
- "rewards/margins": -1.3996615409851074,
40
- "rewards/rejected": -9.082376480102539,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.14222222222222222,
45
- "grad_norm": 664.7484741210938,
46
  "learning_rate": 2.34375e-05,
47
- "log_odds_chosen": -1.274759292602539,
48
- "log_odds_ratio": -10.432046890258789,
49
- "logps/chosen": -20.83938980102539,
50
- "logps/rejected": -19.563129425048828,
51
- "loss": 686.9441,
52
- "nll_loss": 8.075501441955566,
53
- "rewards/accuracies": 0.515625,
54
- "rewards/chosen": -10.419694900512695,
55
- "rewards/margins": -0.638128936290741,
56
- "rewards/rejected": -9.781564712524414,
57
  "step": 15
58
  },
59
  {
60
  "epoch": 0.18962962962962962,
61
- "grad_norm": 5581.04345703125,
62
  "learning_rate": 3.125e-05,
63
- "log_odds_chosen": -3.752788543701172,
64
- "log_odds_ratio": -8.436421394348145,
65
- "logps/chosen": -15.843017578125,
66
- "logps/rejected": -12.092793464660645,
67
- "loss": 565.9146,
68
- "nll_loss": 6.499529838562012,
69
- "rewards/accuracies": 0.4375,
70
- "rewards/chosen": -7.9215087890625,
71
- "rewards/margins": -1.8751119375228882,
72
- "rewards/rejected": -6.046396732330322,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 0.23703703703703705,
77
- "grad_norm": 271.88677978515625,
78
  "learning_rate": 3.90625e-05,
79
- "log_odds_chosen": -0.09555625915527344,
80
- "log_odds_ratio": -1.5934648513793945,
81
- "logps/chosen": -3.681891918182373,
82
- "logps/rejected": -3.571626663208008,
83
- "loss": 120.1892,
84
- "nll_loss": 3.53655743598938,
85
- "rewards/accuracies": 0.5562499761581421,
86
- "rewards/chosen": -1.8409459590911865,
87
- "rewards/margins": -0.055132925510406494,
88
- "rewards/rejected": -1.785813331604004,
89
  "step": 25
90
  },
91
  {
92
  "epoch": 0.28444444444444444,
93
- "grad_norm": 225.77731323242188,
94
  "learning_rate": 4.6875e-05,
95
- "log_odds_chosen": 0.05429550260305405,
96
- "log_odds_ratio": -0.8730447888374329,
97
- "logps/chosen": -1.855459213256836,
98
- "logps/rejected": -1.9073245525360107,
99
- "loss": 58.9805,
100
- "nll_loss": 2.5173518657684326,
101
- "rewards/accuracies": 0.5249999761581421,
102
- "rewards/chosen": -0.927729606628418,
103
- "rewards/margins": 0.025932852178812027,
104
- "rewards/rejected": -0.9536622762680054,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 0.33185185185185184,
109
- "grad_norm": 330.9626770019531,
110
  "learning_rate": 4.998613757348784e-05,
111
- "log_odds_chosen": 0.21338006854057312,
112
- "log_odds_ratio": -0.8034284710884094,
113
- "logps/chosen": -1.7032153606414795,
114
- "logps/rejected": -1.8913615942001343,
115
- "loss": 52.0844,
116
- "nll_loss": 2.3274359703063965,
117
- "rewards/accuracies": 0.518750011920929,
118
- "rewards/chosen": -0.8516076803207397,
119
- "rewards/margins": 0.09407330304384232,
120
- "rewards/rejected": -0.9456807971000671,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 0.37925925925925924,
125
- "grad_norm": 175.3074493408203,
126
  "learning_rate": 4.990147841143462e-05,
127
- "log_odds_chosen": 0.31794872879981995,
128
- "log_odds_ratio": -0.7034906148910522,
129
- "logps/chosen": -1.5063345432281494,
130
- "logps/rejected": -1.7724323272705078,
131
- "loss": 45.107,
132
- "nll_loss": 2.3017280101776123,
133
- "rewards/accuracies": 0.590624988079071,
134
- "rewards/chosen": -0.7531672716140747,
135
- "rewards/margins": 0.1330489218235016,
136
- "rewards/rejected": -0.8862161636352539,
137
  "step": 40
138
  },
139
  {
140
  "epoch": 0.4266666666666667,
141
- "grad_norm": 148.9108123779297,
142
  "learning_rate": 4.97401218720448e-05,
143
- "log_odds_chosen": 0.32661986351013184,
144
- "log_odds_ratio": -0.695238471031189,
145
- "logps/chosen": -1.4421089887619019,
146
- "logps/rejected": -1.732181191444397,
147
- "loss": 42.7725,
148
- "nll_loss": 2.121166944503784,
149
- "rewards/accuracies": 0.6031249761581421,
150
- "rewards/chosen": -0.7210544943809509,
151
- "rewards/margins": 0.14503604173660278,
152
- "rewards/rejected": -0.8660905957221985,
153
  "step": 45
154
  },
155
  {
156
  "epoch": 0.4740740740740741,
157
- "grad_norm": 168.20802307128906,
158
  "learning_rate": 4.9502564938797946e-05,
159
- "log_odds_chosen": 0.41916948556900024,
160
- "log_odds_ratio": -0.6709465980529785,
161
- "logps/chosen": -1.3388197422027588,
162
- "logps/rejected": -1.7024009227752686,
163
- "loss": 38.517,
164
- "nll_loss": 2.0670645236968994,
165
- "rewards/accuracies": 0.625,
166
- "rewards/chosen": -0.6694098711013794,
167
- "rewards/margins": 0.18179059028625488,
168
- "rewards/rejected": -0.8512004613876343,
169
  "step": 50
170
  },
171
  {
172
  "epoch": 0.5214814814814814,
173
- "grad_norm": 1289.98828125,
174
  "learning_rate": 4.918953929490768e-05,
175
- "log_odds_chosen": 1.8812118768692017,
176
- "log_odds_ratio": -0.7291940450668335,
177
- "logps/chosen": -1.6740214824676514,
178
- "logps/rejected": -3.4779555797576904,
179
- "loss": 26.985,
180
- "nll_loss": 2.3563015460968018,
181
- "rewards/accuracies": 0.625,
182
- "rewards/chosen": -0.8370107412338257,
183
- "rewards/margins": 0.9019671678543091,
184
- "rewards/rejected": -1.7389777898788452,
185
  "step": 55
186
  },
187
  {
188
  "epoch": 0.5688888888888889,
189
- "grad_norm": 846.8933715820312,
190
  "learning_rate": 4.88020090697132e-05,
191
- "log_odds_chosen": 5.034865379333496,
192
- "log_odds_ratio": -0.7896591424942017,
193
- "logps/chosen": -1.8219846487045288,
194
- "logps/rejected": -6.7804718017578125,
195
- "loss": -18.5626,
196
- "nll_loss": 2.4730238914489746,
197
- "rewards/accuracies": 0.653124988079071,
198
- "rewards/chosen": -0.9109923243522644,
199
- "rewards/margins": 2.479243755340576,
200
- "rewards/rejected": -3.3902359008789062,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 0.6162962962962963,
205
- "grad_norm": 1050.5416259765625,
206
  "learning_rate": 4.834116786912897e-05,
207
- "log_odds_chosen": 7.387457370758057,
208
- "log_odds_ratio": -2.9080045223236084,
209
- "logps/chosen": -5.713994026184082,
210
- "logps/rejected": -13.041893005371094,
211
- "loss": 67.5015,
212
- "nll_loss": 5.906378746032715,
213
- "rewards/accuracies": 0.6312500238418579,
214
- "rewards/chosen": -2.856997013092041,
215
- "rewards/margins": 3.6639492511749268,
216
- "rewards/rejected": -6.520946502685547,
217
  "step": 65
218
  },
219
  {
220
  "epoch": 0.6637037037037037,
221
- "grad_norm": 14261.298828125,
222
  "learning_rate": 4.7808435099299045e-05,
223
- "log_odds_chosen": 14.964570999145508,
224
- "log_odds_ratio": -1.2665787935256958,
225
- "logps/chosen": -2.980807304382324,
226
- "logps/rejected": -17.873931884765625,
227
- "loss": -140.3602,
228
- "nll_loss": 3.395730495452881,
229
- "rewards/accuracies": 0.6656249761581421,
230
- "rewards/chosen": -1.490403652191162,
231
- "rewards/margins": 7.446562767028809,
232
- "rewards/rejected": -8.936965942382812,
233
  "step": 70
234
  },
235
  {
236
  "epoch": 0.7111111111111111,
237
- "grad_norm": 3282.506591796875,
238
  "learning_rate": 4.720545159477922e-05,
239
- "log_odds_chosen": 11.172109603881836,
240
- "log_odds_ratio": -1.5487353801727295,
241
- "logps/chosen": -2.9426088333129883,
242
- "logps/rejected": -14.05163860321045,
243
- "loss": -81.785,
244
- "nll_loss": 3.614391326904297,
245
- "rewards/accuracies": 0.643750011920929,
246
- "rewards/chosen": -1.4713044166564941,
247
- "rewards/margins": 5.554513931274414,
248
- "rewards/rejected": -7.025819301605225,
249
  "step": 75
250
  },
251
  {
252
  "epoch": 0.7585185185185185,
253
- "grad_norm": 22710.70703125,
254
  "learning_rate": 4.653407456471222e-05,
255
- "log_odds_chosen": 14.214700698852539,
256
- "log_odds_ratio": -3.5922465324401855,
257
- "logps/chosen": -5.844705104827881,
258
- "logps/rejected": -20.002582550048828,
259
- "loss": -37.6675,
260
- "nll_loss": 6.309117794036865,
261
- "rewards/accuracies": 0.59375,
262
- "rewards/chosen": -2.9223525524139404,
263
- "rewards/margins": 7.078938961029053,
264
- "rewards/rejected": -10.001291275024414,
265
  "step": 80
266
  },
267
  {
268
  "epoch": 0.8059259259259259,
269
- "grad_norm": 4206.48681640625,
270
  "learning_rate": 4.579637187256222e-05,
271
- "log_odds_chosen": 11.754661560058594,
272
- "log_odds_ratio": -2.568394184112549,
273
- "logps/chosen": -5.506086826324463,
274
- "logps/rejected": -17.211210250854492,
275
- "loss": -9.382,
276
- "nll_loss": 5.795596122741699,
277
- "rewards/accuracies": 0.6156250238418579,
278
- "rewards/chosen": -2.7530434131622314,
279
- "rewards/margins": 5.852560997009277,
280
- "rewards/rejected": -8.605605125427246,
281
  "step": 85
282
  },
283
  {
284
  "epoch": 0.8533333333333334,
285
- "grad_norm": 2450.24169921875,
286
  "learning_rate": 4.499461566702685e-05,
287
- "log_odds_chosen": 10.724493980407715,
288
- "log_odds_ratio": -0.7071830034255981,
289
- "logps/chosen": -1.4572128057479858,
290
- "logps/rejected": -12.113273620605469,
291
- "loss": -122.1039,
292
- "nll_loss": 2.4590225219726562,
293
  "rewards/accuracies": 0.6156250238418579,
294
- "rewards/chosen": -0.7286064028739929,
295
- "rewards/margins": 5.328030109405518,
296
- "rewards/rejected": -6.056636810302734,
297
  "step": 90
298
  },
299
  {
300
  "epoch": 0.9007407407407407,
301
- "grad_norm": 2608.7001953125,
302
  "learning_rate": 4.413127538374411e-05,
303
- "log_odds_chosen": 16.54291534423828,
304
- "log_odds_ratio": -3.4148666858673096,
305
- "logps/chosen": -4.583926200866699,
306
- "logps/rejected": -21.081348419189453,
307
- "loss": -115.7481,
308
- "nll_loss": 4.339598655700684,
309
- "rewards/accuracies": 0.5874999761581421,
310
- "rewards/chosen": -2.2919631004333496,
311
- "rewards/margins": 8.248711585998535,
312
- "rewards/rejected": -10.540674209594727,
313
  "step": 95
314
  },
315
  {
316
  "epoch": 0.9481481481481482,
317
- "grad_norm": 1113.737548828125,
318
  "learning_rate": 4.320901013934887e-05,
319
- "log_odds_chosen": 20.849918365478516,
320
- "log_odds_ratio": -2.3039841651916504,
321
- "logps/chosen": -3.297405242919922,
322
- "logps/rejected": -24.073766708374023,
323
- "loss": -224.7481,
324
- "nll_loss": 4.907585144042969,
325
- "rewards/accuracies": 0.6187499761581421,
326
- "rewards/chosen": -1.648702621459961,
327
- "rewards/margins": 10.38818073272705,
328
- "rewards/rejected": -12.036883354187012,
329
  "step": 100
330
  },
331
  {
332
  "epoch": 0.9955555555555555,
333
- "grad_norm": 564.52294921875,
334
  "learning_rate": 4.223066054130568e-05,
335
- "log_odds_chosen": 21.292091369628906,
336
- "log_odds_ratio": -1.3454216718673706,
337
- "logps/chosen": -3.835742473602295,
338
- "logps/rejected": -25.064594268798828,
339
- "loss": -214.725,
340
- "nll_loss": 4.128335475921631,
341
- "rewards/accuracies": 0.609375,
342
- "rewards/chosen": -1.9178712368011475,
343
- "rewards/margins": 10.614426612854004,
344
- "rewards/rejected": -12.532297134399414,
345
  "step": 105
346
  },
347
  {
348
  "epoch": 1.037925925925926,
349
- "grad_norm": 2678.355224609375,
350
  "learning_rate": 4.1199239938743797e-05,
351
- "log_odds_chosen": 17.971054077148438,
352
- "log_odds_ratio": -0.902182936668396,
353
- "logps/chosen": -1.6016716957092285,
354
- "logps/rejected": -19.46338653564453,
355
- "loss": -207.4208,
356
- "nll_loss": 2.815678358078003,
357
  "rewards/accuracies": 0.6538461446762085,
358
- "rewards/chosen": -0.8008358478546143,
359
- "rewards/margins": 8.930856704711914,
360
- "rewards/rejected": -9.731693267822266,
361
  "step": 110
362
  },
363
  {
364
  "epoch": 1.0853333333333333,
365
- "grad_norm": 16211.197265625,
366
  "learning_rate": 4.0117925141242174e-05,
367
- "log_odds_chosen": 32.54540252685547,
368
- "log_odds_ratio": -7.060112953186035,
369
- "logps/chosen": -16.32876205444336,
370
- "logps/rejected": -48.71499252319336,
371
- "loss": 7.6217,
372
- "nll_loss": 15.108541488647461,
373
- "rewards/accuracies": 0.6968749761581421,
374
- "rewards/chosen": -8.16438102722168,
375
- "rewards/margins": 16.193115234375,
376
- "rewards/rejected": -24.35749626159668,
377
  "step": 115
378
  },
379
  {
380
  "epoch": 1.1327407407407408,
381
- "grad_norm": 24504.349609375,
382
  "learning_rate": 3.899004663415084e-05,
383
- "log_odds_chosen": 28.460607528686523,
384
- "log_odds_ratio": -6.060556888580322,
385
- "logps/chosen": -10.85397720336914,
386
- "logps/rejected": -39.16749954223633,
387
- "loss": -102.1427,
388
- "nll_loss": 10.644952774047852,
389
- "rewards/accuracies": 0.6937500238418579,
390
- "rewards/chosen": -5.42698860168457,
391
- "rewards/margins": 14.156761169433594,
392
- "rewards/rejected": -19.583749771118164,
393
  "step": 120
394
  },
395
  {
396
  "epoch": 1.1801481481481482,
397
- "grad_norm": 40890.17578125,
398
  "learning_rate": 3.781907832058587e-05,
399
- "log_odds_chosen": 20.61638832092285,
400
- "log_odds_ratio": -6.12774133682251,
401
- "logps/chosen": -11.241783142089844,
402
- "logps/rejected": -31.751750946044922,
403
- "loss": 34.3804,
404
- "nll_loss": 11.169973373413086,
405
- "rewards/accuracies": 0.675000011920929,
406
- "rewards/chosen": -5.620891571044922,
407
- "rewards/margins": 10.254984855651855,
408
- "rewards/rejected": -15.875875473022461,
409
  "step": 125
410
  },
411
  {
412
  "epoch": 1.2275555555555555,
413
- "grad_norm": 4595.94677734375,
414
  "learning_rate": 3.660862682169282e-05,
415
- "log_odds_chosen": 15.926393508911133,
416
- "log_odds_ratio": -2.1731228828430176,
417
- "logps/chosen": -4.010047912597656,
418
- "logps/rejected": -19.802104949951172,
419
- "loss": -121.6234,
420
- "nll_loss": 4.461260795593262,
421
- "rewards/accuracies": 0.6468750238418579,
422
- "rewards/chosen": -2.005023956298828,
423
- "rewards/margins": 7.896027565002441,
424
- "rewards/rejected": -9.901052474975586,
425
  "step": 130
426
  },
427
  {
428
  "epoch": 1.274962962962963,
429
- "grad_norm": 7289.251953125,
430
  "learning_rate": 3.5362420368134356e-05,
431
- "log_odds_chosen": 23.66111946105957,
432
- "log_odds_ratio": -1.2839603424072266,
433
- "logps/chosen": -2.44439959526062,
434
- "logps/rejected": -25.9698429107666,
435
- "loss": -294.9251,
436
- "nll_loss": 3.672980546951294,
437
- "rewards/accuracies": 0.71875,
438
- "rewards/chosen": -1.22219979763031,
439
- "rewards/margins": 11.762721061706543,
440
- "rewards/rejected": -12.9849214553833,
441
  "step": 135
442
  },
443
  {
444
  "epoch": 1.3223703703703704,
445
- "grad_norm": 35099.68359375,
446
  "learning_rate": 3.408429731701635e-05,
447
- "log_odds_chosen": 22.592227935791016,
448
- "log_odds_ratio": -3.179147958755493,
449
- "logps/chosen": -5.041490077972412,
450
- "logps/rejected": -27.51487159729004,
451
- "loss": -195.6627,
452
- "nll_loss": 5.91623592376709,
453
- "rewards/accuracies": 0.6875,
454
- "rewards/chosen": -2.520745038986206,
455
- "rewards/margins": 11.236689567565918,
456
- "rewards/rejected": -13.75743579864502,
457
  "step": 140
458
  },
459
  {
460
  "epoch": 1.3697777777777778,
461
- "grad_norm": 4246.3798828125,
462
  "learning_rate": 3.2778194329621104e-05,
463
- "log_odds_chosen": 20.304393768310547,
464
- "log_odds_ratio": -2.3136236667633057,
465
- "logps/chosen": -3.2953147888183594,
466
- "logps/rejected": -23.475563049316406,
467
- "loss": -214.76,
468
- "nll_loss": 4.836905002593994,
469
- "rewards/accuracies": 0.675000011920929,
470
- "rewards/chosen": -1.6476573944091797,
471
- "rewards/margins": 10.090123176574707,
472
- "rewards/rejected": -11.737781524658203,
473
  "step": 145
474
  },
475
  {
476
  "epoch": 1.417185185185185,
477
- "grad_norm": 112606.7421875,
478
  "learning_rate": 3.144813424636031e-05,
479
- "log_odds_chosen": 28.99297523498535,
480
- "log_odds_ratio": -4.194870948791504,
481
- "logps/chosen": -7.332457542419434,
482
- "logps/rejected": -36.18639373779297,
483
- "loss": -223.9742,
484
- "nll_loss": 9.322813034057617,
485
- "rewards/accuracies": 0.6625000238418579,
486
- "rewards/chosen": -3.666228771209717,
487
- "rewards/margins": 14.426966667175293,
488
- "rewards/rejected": -18.093196868896484,
489
  "step": 150
490
  },
491
  {
492
  "epoch": 1.4645925925925927,
493
- "grad_norm": 57207.05859375,
494
  "learning_rate": 3.0098213696293542e-05,
495
- "log_odds_chosen": 27.4794864654541,
496
- "log_odds_ratio": -6.622164726257324,
497
- "logps/chosen": -13.51585578918457,
498
- "logps/rejected": -40.87260818481445,
499
- "loss": -2.1912,
500
- "nll_loss": 16.61840057373047,
501
- "rewards/accuracies": 0.6499999761581421,
502
- "rewards/chosen": -6.757927894592285,
503
- "rewards/margins": 13.678375244140625,
504
- "rewards/rejected": -20.436304092407227,
505
  "step": 155
506
  },
507
  {
508
  "epoch": 1.512,
509
- "grad_norm": 22721.037109375,
510
  "learning_rate": 2.8732590479375165e-05,
511
- "log_odds_chosen": 26.662273406982422,
512
- "log_odds_ratio": -6.387181282043457,
513
- "logps/chosen": -10.372232437133789,
514
- "logps/rejected": -36.92156219482422,
515
- "loss": -90.2426,
516
- "nll_loss": 9.191670417785645,
517
- "rewards/accuracies": 0.643750011920929,
518
- "rewards/chosen": -5.1861162185668945,
519
- "rewards/margins": 13.274665832519531,
520
- "rewards/rejected": -18.46078109741211,
521
  "step": 160
522
  },
523
  {
524
  "epoch": 1.5594074074074074,
525
- "grad_norm": 8884.1220703125,
526
  "learning_rate": 2.7355470760292956e-05,
527
- "log_odds_chosen": 22.65861701965332,
528
- "log_odds_ratio": -3.268022060394287,
529
- "logps/chosen": -4.845406532287598,
530
- "logps/rejected": -27.359100341796875,
531
- "loss": -202.281,
532
- "nll_loss": 5.931666374206543,
533
- "rewards/accuracies": 0.684374988079071,
534
- "rewards/chosen": -2.422703266143799,
535
- "rewards/margins": 11.256847381591797,
536
- "rewards/rejected": -13.679550170898438,
537
  "step": 165
538
  },
539
  {
540
  "epoch": 1.6068148148148147,
541
- "grad_norm": 10747.482421875,
542
  "learning_rate": 2.597109611334169e-05,
543
- "log_odds_chosen": 24.835664749145508,
544
- "log_odds_ratio": -5.039122104644775,
545
- "logps/chosen": -8.912939071655273,
546
- "logps/rejected": -33.64850616455078,
547
- "loss": -108.0337,
548
- "nll_loss": 9.482011795043945,
549
- "rewards/accuracies": 0.6468750238418579,
550
- "rewards/chosen": -4.456469535827637,
551
- "rewards/margins": 12.367780685424805,
552
- "rewards/rejected": -16.82425308227539,
553
  "step": 170
554
  },
555
  {
556
  "epoch": 1.6542222222222223,
557
- "grad_norm": 25721.013671875,
558
  "learning_rate": 2.458373045823404e-05,
559
- "log_odds_chosen": 17.206180572509766,
560
- "log_odds_ratio": -5.200994968414307,
561
- "logps/chosen": -10.323943138122559,
562
- "logps/rejected": -27.417028427124023,
563
- "loss": 59.6029,
564
- "nll_loss": 11.46679973602295,
565
- "rewards/accuracies": 0.675000011920929,
566
- "rewards/chosen": -5.161971569061279,
567
- "rewards/margins": 8.546542167663574,
568
- "rewards/rejected": -13.708514213562012,
569
  "step": 175
570
  },
571
  {
572
  "epoch": 1.7016296296296296,
573
- "grad_norm": 5182.34716796875,
574
  "learning_rate": 2.3197646927086697e-05,
575
- "log_odds_chosen": 17.235403060913086,
576
- "log_odds_ratio": -5.827118873596191,
577
- "logps/chosen": -9.327442169189453,
578
- "logps/rejected": -26.453588485717773,
579
- "loss": 26.7508,
580
- "nll_loss": 10.854573249816895,
581
- "rewards/accuracies": 0.6499999761581421,
582
- "rewards/chosen": -4.663721084594727,
583
- "rewards/margins": 8.56307315826416,
584
- "rewards/rejected": -13.226794242858887,
585
  "step": 180
586
  },
587
  {
588
  "epoch": 1.749037037037037,
589
- "grad_norm": 1122.926513671875,
590
  "learning_rate": 2.1817114703032176e-05,
591
- "log_odds_chosen": 12.183832168579102,
592
- "log_odds_ratio": -3.2944283485412598,
593
- "logps/chosen": -4.907401084899902,
594
- "logps/rejected": -17.006296157836914,
595
- "loss": -34.3607,
596
- "nll_loss": 4.914076328277588,
597
- "rewards/accuracies": 0.640625,
598
- "rewards/chosen": -2.453700542449951,
599
- "rewards/margins": 6.049447059631348,
600
- "rewards/rejected": -8.503148078918457,
601
  "step": 185
602
  },
603
  {
604
  "epoch": 1.7964444444444445,
605
- "grad_norm": 8495.58984375,
606
  "learning_rate": 2.0446385870993467e-05,
607
- "log_odds_chosen": 27.890201568603516,
608
- "log_odds_ratio": -4.537432670593262,
609
- "logps/chosen": -6.876814365386963,
610
- "logps/rejected": -34.62470245361328,
611
- "loss": -220.5601,
612
- "nll_loss": 7.243471622467041,
613
- "rewards/accuracies": 0.690625011920929,
614
- "rewards/chosen": -3.4384071826934814,
615
- "rewards/margins": 13.873942375183105,
616
- "rewards/rejected": -17.31235122680664,
617
  "step": 190
618
  },
619
  {
620
  "epoch": 1.8438518518518519,
621
- "grad_norm": 21205.203125,
622
  "learning_rate": 1.9089682321121834e-05,
623
- "log_odds_chosen": 28.1275577545166,
624
- "log_odds_ratio": -10.069475173950195,
625
- "logps/chosen": -13.262001037597656,
626
- "logps/rejected": -41.2625732421875,
627
- "loss": -20.6418,
628
- "nll_loss": 14.18327522277832,
629
- "rewards/accuracies": 0.671875,
630
- "rewards/chosen": -6.631000518798828,
631
- "rewards/margins": 14.000287055969238,
632
- "rewards/rejected": -20.63128662109375,
633
  "step": 195
634
  },
635
  {
636
  "epoch": 1.8912592592592592,
637
- "grad_norm": 74072.71875,
638
  "learning_rate": 1.775118274523545e-05,
639
- "log_odds_chosen": 39.34962844848633,
640
- "log_odds_ratio": -16.783084869384766,
641
- "logps/chosen": -29.612789154052734,
642
- "logps/rejected": -68.8023452758789,
643
- "loss": 324.4527,
644
- "nll_loss": 29.834827423095703,
645
- "rewards/accuracies": 0.7124999761581421,
646
- "rewards/chosen": -14.806394577026367,
647
- "rewards/margins": 19.59477424621582,
648
- "rewards/rejected": -34.40117263793945,
649
  "step": 200
650
  },
651
  {
652
  "epoch": 1.9386666666666668,
653
- "grad_norm": 395353.21875,
654
  "learning_rate": 1.643500976631037e-05,
655
- "log_odds_chosen": 42.678619384765625,
656
- "log_odds_ratio": -18.052345275878906,
657
- "logps/chosen": -42.023616790771484,
658
- "logps/rejected": -84.55274963378906,
659
- "loss": 668.5252,
660
- "nll_loss": 37.75287628173828,
661
  "rewards/accuracies": 0.71875,
662
- "rewards/chosen": -21.011808395385742,
663
- "rewards/margins": 21.264570236206055,
664
- "rewards/rejected": -42.27637481689453,
665
  "step": 205
666
  },
667
  {
668
  "epoch": 1.986074074074074,
669
- "grad_norm": 80640.96875,
670
  "learning_rate": 1.514521724066537e-05,
671
- "log_odds_chosen": 39.148033142089844,
672
- "log_odds_ratio": -19.23377799987793,
673
- "logps/chosen": -32.76858901977539,
674
- "logps/rejected": -71.80208587646484,
675
- "loss": 426.931,
676
- "nll_loss": 26.356372833251953,
677
- "rewards/accuracies": 0.668749988079071,
678
- "rewards/chosen": -16.384294509887695,
679
- "rewards/margins": 19.51674461364746,
680
- "rewards/rejected": -35.90104293823242,
681
  "step": 210
682
  },
683
  {
684
  "epoch": 2.0284444444444443,
685
- "grad_norm": 34031.30859375,
686
  "learning_rate": 1.3885777771950348e-05,
687
- "log_odds_chosen": 39.754032135009766,
688
- "log_odds_ratio": -14.231975555419922,
689
- "logps/chosen": -18.812442779541016,
690
- "logps/rejected": -58.38700485229492,
691
- "loss": -24.6149,
692
- "nll_loss": 17.645992279052734,
693
- "rewards/accuracies": 0.6958041787147522,
694
- "rewards/chosen": -9.406221389770508,
695
- "rewards/margins": 19.787281036376953,
696
- "rewards/rejected": -29.19350242614746,
697
  "step": 215
698
  },
699
  {
700
  "epoch": 2.075851851851852,
701
- "grad_norm": 16462.95703125,
702
  "learning_rate": 1.2660570475395683e-05,
703
- "log_odds_chosen": 28.321569442749023,
704
- "log_odds_ratio": -13.535270690917969,
705
- "logps/chosen": -21.737045288085938,
706
- "logps/rejected": -49.909423828125,
707
- "loss": 248.0299,
708
- "nll_loss": 19.69781494140625,
709
- "rewards/accuracies": 0.6875,
710
- "rewards/chosen": -10.868522644042969,
711
- "rewards/margins": 14.086189270019531,
712
- "rewards/rejected": -24.9547119140625,
713
  "step": 220
714
  },
715
  {
716
  "epoch": 2.1232592592592594,
717
- "grad_norm": 17380.919921875,
718
  "learning_rate": 1.1473369030008974e-05,
719
- "log_odds_chosen": 41.30133819580078,
720
- "log_odds_ratio": -5.992632865905762,
721
- "logps/chosen": -9.869775772094727,
722
- "logps/rejected": -51.008033752441406,
723
- "loss": -338.6483,
724
- "nll_loss": 9.406192779541016,
725
- "rewards/accuracies": 0.6937500238418579,
726
- "rewards/chosen": -4.934887886047363,
727
- "rewards/margins": 20.569129943847656,
728
- "rewards/rejected": -25.504016876220703,
729
  "step": 225
730
  },
731
  {
732
  "epoch": 2.1706666666666665,
733
- "grad_norm": 2604.8720703125,
734
  "learning_rate": 1.0327830055518842e-05,
735
- "log_odds_chosen": 40.362274169921875,
736
- "log_odds_ratio": -9.142515182495117,
737
- "logps/chosen": -14.164266586303711,
738
- "logps/rejected": -54.35051345825195,
739
- "loss": -186.1042,
740
- "nll_loss": 13.237195014953613,
741
- "rewards/accuracies": 0.684374988079071,
742
- "rewards/chosen": -7.0821332931518555,
743
- "rewards/margins": 20.093120574951172,
744
- "rewards/rejected": -27.175256729125977,
745
  "step": 230
746
  },
747
  {
748
  "epoch": 2.218074074074074,
749
- "grad_norm": 164766.734375,
750
  "learning_rate": 9.227481849865235e-06,
751
- "log_odds_chosen": 45.14253234863281,
752
- "log_odds_ratio": -7.5437116622924805,
753
- "logps/chosen": -11.828727722167969,
754
- "logps/rejected": -56.79478073120117,
755
- "loss": -336.8467,
756
- "nll_loss": 12.229296684265137,
757
- "rewards/accuracies": 0.721875011920929,
758
- "rewards/chosen": -5.914363861083984,
759
- "rewards/margins": 22.483028411865234,
760
- "rewards/rejected": -28.397390365600586,
761
  "step": 235
762
  },
763
  {
764
  "epoch": 2.2654814814814817,
765
- "grad_norm": 81824.2890625,
766
  "learning_rate": 8.175713521924978e-06,
767
- "log_odds_chosen": 40.605594635009766,
768
- "log_odds_ratio": -11.402399063110352,
769
- "logps/chosen": -26.724740982055664,
770
- "logps/rejected": -67.15190124511719,
771
- "loss": 212.4567,
772
- "nll_loss": 23.99557113647461,
773
- "rewards/accuracies": 0.721875011920929,
774
- "rewards/chosen": -13.362370491027832,
775
- "rewards/margins": 20.213586807250977,
776
- "rewards/rejected": -33.575950622558594,
777
  "step": 240
778
  },
779
  {
780
  "epoch": 2.3128888888888888,
781
- "grad_norm": 14960.7578125,
782
  "learning_rate": 7.1757645529443665e-06,
783
- "log_odds_chosen": 55.09406661987305,
784
- "log_odds_ratio": -10.749176979064941,
785
- "logps/chosen": -19.03034019470215,
786
- "logps/rejected": -73.93666076660156,
787
- "loss": -264.6577,
788
- "nll_loss": 19.223167419433594,
789
- "rewards/accuracies": 0.7593749761581421,
790
- "rewards/chosen": -9.515170097351074,
791
- "rewards/margins": 27.45315933227539,
792
- "rewards/rejected": -36.96833038330078,
793
  "step": 245
794
  },
795
  {
796
  "epoch": 2.3602962962962963,
797
- "grad_norm": 17807.7265625,
798
  "learning_rate": 6.230714818829733e-06,
799
- "log_odds_chosen": 45.54753875732422,
800
- "log_odds_ratio": -7.856490135192871,
801
- "logps/chosen": -11.773348808288574,
802
- "logps/rejected": -57.12473678588867,
803
- "loss": -344.7895,
804
- "nll_loss": 12.78067684173584,
805
  "rewards/accuracies": 0.7093750238418579,
806
- "rewards/chosen": -5.886674404144287,
807
- "rewards/margins": 22.67569351196289,
808
- "rewards/rejected": -28.562368392944336,
809
  "step": 250
810
  },
811
  {
812
  "epoch": 2.407703703703704,
813
- "grad_norm": 104304.0390625,
814
  "learning_rate": 5.343475104027743e-06,
815
- "log_odds_chosen": 47.276451110839844,
816
- "log_odds_ratio": -8.991453170776367,
817
- "logps/chosen": -13.746038436889648,
818
- "logps/rejected": -60.85129928588867,
819
- "loss": -309.5943,
820
- "nll_loss": 14.252038955688477,
821
- "rewards/accuracies": 0.7281249761581421,
822
- "rewards/chosen": -6.873019218444824,
823
- "rewards/margins": 23.552631378173828,
824
- "rewards/rejected": -30.425649642944336,
825
  "step": 255
826
  },
827
  {
828
  "epoch": 2.455111111111111,
829
- "grad_norm": 38147.03125,
830
  "learning_rate": 4.516778136213037e-06,
831
- "log_odds_chosen": 40.15815734863281,
832
- "log_odds_ratio": -7.543868064880371,
833
- "logps/chosen": -11.01616096496582,
834
- "logps/rejected": -50.988121032714844,
835
- "loss": -282.8974,
836
- "nll_loss": 12.92004108428955,
837
- "rewards/accuracies": 0.731249988079071,
838
- "rewards/chosen": -5.50808048248291,
839
- "rewards/margins": 19.985979080200195,
840
- "rewards/rejected": -25.494060516357422,
841
  "step": 260
842
  },
843
  {
844
  "epoch": 2.5025185185185186,
845
- "grad_norm": 24916.00390625,
846
  "learning_rate": 3.7531701693965554e-06,
847
- "log_odds_chosen": 50.416419982910156,
848
- "log_odds_ratio": -8.888219833374023,
849
- "logps/chosen": -14.691574096679688,
850
- "logps/rejected": -64.95278930664062,
851
- "loss": -330.3818,
852
- "nll_loss": 14.175634384155273,
853
- "rewards/accuracies": 0.6937500238418579,
854
- "rewards/chosen": -7.345787048339844,
855
- "rewards/margins": 25.1306095123291,
856
- "rewards/rejected": -32.47639465332031,
857
  "step": 265
858
  },
859
  {
860
  "epoch": 2.549925925925926,
861
- "grad_norm": 17820.6953125,
862
  "learning_rate": 3.055003141378948e-06,
863
- "log_odds_chosen": 56.49353790283203,
864
- "log_odds_ratio": -4.24954891204834,
865
- "logps/chosen": -7.077002048492432,
866
- "logps/rejected": -63.38737106323242,
867
- "loss": -670.0222,
868
- "nll_loss": 8.986559867858887,
869
- "rewards/accuracies": 0.734375,
870
- "rewards/chosen": -3.538501024246216,
871
- "rewards/margins": 28.155181884765625,
872
- "rewards/rejected": -31.69368553161621,
873
  "step": 270
874
  },
875
  {
876
  "epoch": 2.5973333333333333,
877
- "grad_norm": 21286.701171875,
878
  "learning_rate": 2.424427429704365e-06,
879
- "log_odds_chosen": 45.696632385253906,
880
- "log_odds_ratio": -8.857467651367188,
881
- "logps/chosen": -13.247896194458008,
882
- "logps/rejected": -58.740142822265625,
883
- "loss": -299.5551,
884
- "nll_loss": 12.918680191040039,
885
- "rewards/accuracies": 0.778124988079071,
886
- "rewards/chosen": -6.623948097229004,
887
- "rewards/margins": 22.746124267578125,
888
- "rewards/rejected": -29.370071411132812,
889
  "step": 275
890
  },
891
  {
892
  "epoch": 2.644740740740741,
893
- "grad_norm": 50187.1171875,
894
  "learning_rate": 1.8633852284264508e-06,
895
- "log_odds_chosen": 55.4704704284668,
896
- "log_odds_ratio": -6.3292717933654785,
897
- "logps/chosen": -11.759183883666992,
898
- "logps/rejected": -67.04707336425781,
899
- "loss": -503.517,
900
- "nll_loss": 12.290529251098633,
901
- "rewards/accuracies": 0.734375,
902
- "rewards/chosen": -5.879591941833496,
903
- "rewards/margins": 27.643945693969727,
904
- "rewards/rejected": -33.523536682128906,
905
  "step": 280
906
  },
907
  {
908
  "epoch": 2.6921481481481484,
909
- "grad_norm": 114461.5,
910
  "learning_rate": 1.3736045660864034e-06,
911
- "log_odds_chosen": 41.011329650878906,
912
- "log_odds_ratio": -8.477639198303223,
913
- "logps/chosen": -12.918500900268555,
914
- "logps/rejected": -53.76482391357422,
915
- "loss": -236.1638,
916
- "nll_loss": 13.538793563842773,
917
- "rewards/accuracies": 0.7250000238418579,
918
- "rewards/chosen": -6.459250450134277,
919
- "rewards/margins": 20.423160552978516,
920
- "rewards/rejected": -26.88241195678711,
921
  "step": 285
922
  },
923
  {
924
  "epoch": 2.7395555555555555,
925
- "grad_norm": 26927.990234375,
926
  "learning_rate": 9.565939833279192e-07,
927
- "log_odds_chosen": 40.3164176940918,
928
- "log_odds_ratio": -15.15136432647705,
929
- "logps/chosen": -22.780040740966797,
930
- "logps/rejected": -62.926292419433594,
931
- "loss": 90.1159,
932
- "nll_loss": 20.46670913696289,
933
- "rewards/accuracies": 0.6875,
934
- "rewards/chosen": -11.390020370483398,
935
- "rewards/margins": 20.073129653930664,
936
- "rewards/rejected": -31.463146209716797,
937
  "step": 290
938
  },
939
  {
940
  "epoch": 2.786962962962963,
941
- "grad_norm": 10974.4052734375,
942
  "learning_rate": 6.136378865420872e-07,
943
- "log_odds_chosen": 47.6572380065918,
944
- "log_odds_ratio": -7.081920623779297,
945
- "logps/chosen": -10.659754753112793,
946
- "logps/rejected": -58.14702224731445,
947
- "loss": -414.5048,
948
- "nll_loss": 11.201601028442383,
949
- "rewards/accuracies": 0.7093750238418579,
950
- "rewards/chosen": -5.3298773765563965,
951
- "rewards/margins": 23.743627548217773,
952
- "rewards/rejected": -29.073511123657227,
953
  "step": 295
954
  },
955
  {
956
  "epoch": 2.83437037037037,
957
- "grad_norm": 42624.33203125,
958
  "learning_rate": 3.45792591853214e-07,
959
- "log_odds_chosen": 52.529335021972656,
960
- "log_odds_ratio": -6.441692352294922,
961
- "logps/chosen": -9.135614395141602,
962
- "logps/rejected": -61.50330352783203,
963
- "loss": -541.3402,
964
- "nll_loss": 10.765565872192383,
965
- "rewards/accuracies": 0.7437499761581421,
966
- "rewards/chosen": -4.567807197570801,
967
- "rewards/margins": 26.1838436126709,
968
- "rewards/rejected": -30.751651763916016,
969
  "step": 300
970
  },
971
  {
972
  "epoch": 2.8817777777777778,
973
- "grad_norm": 8348.119140625,
974
  "learning_rate": 1.538830716302092e-07,
975
- "log_odds_chosen": 44.924232482910156,
976
- "log_odds_ratio": -9.396774291992188,
977
- "logps/chosen": -14.924581527709961,
978
- "logps/rejected": -59.64234161376953,
979
- "loss": -233.7471,
980
- "nll_loss": 14.424337387084961,
981
- "rewards/accuracies": 0.7124999761581421,
982
- "rewards/chosen": -7.4622907638549805,
983
- "rewards/margins": 22.3588809967041,
984
- "rewards/rejected": -29.821170806884766,
985
  "step": 305
986
  },
987
  {
988
  "epoch": 2.9291851851851853,
989
- "grad_norm": 8581.84765625,
990
  "learning_rate": 3.8500413544415025e-08,
991
- "log_odds_chosen": 50.05999755859375,
992
- "log_odds_ratio": -14.670018196105957,
993
- "logps/chosen": -20.66399574279785,
994
- "logps/rejected": -70.54561614990234,
995
- "loss": -132.5917,
996
- "nll_loss": 18.822933197021484,
997
- "rewards/accuracies": 0.7437499761581421,
998
- "rewards/chosen": -10.331997871398926,
999
- "rewards/margins": 24.940807342529297,
1000
- "rewards/rejected": -35.27280807495117,
1001
  "step": 310
1002
  },
1003
  {
1004
  "epoch": 2.9765925925925925,
1005
- "grad_norm": 8825.96875,
1006
  "learning_rate": 0.0,
1007
- "log_odds_chosen": 60.088844299316406,
1008
- "log_odds_ratio": -5.067689418792725,
1009
- "logps/chosen": -8.742526054382324,
1010
- "logps/rejected": -68.63340759277344,
1011
- "loss": -673.9993,
1012
- "nll_loss": 10.168830871582031,
1013
  "rewards/accuracies": 0.75,
1014
- "rewards/chosen": -4.371263027191162,
1015
- "rewards/margins": 29.945446014404297,
1016
- "rewards/rejected": -34.31670379638672,
1017
  "step": 315
1018
  },
1019
  {
1020
  "epoch": 2.9765925925925925,
1021
  "step": 315,
1022
  "total_flos": 0.0,
1023
- "train_loss": -61.895723918127636,
1024
- "train_runtime": 9291.6158,
1025
- "train_samples_per_second": 2.179,
1026
  "train_steps_per_second": 0.034
1027
  }
1028
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.047407407407407405,
13
+ "grad_norm": 1346.035400390625,
14
  "learning_rate": 7.8125e-06,
15
+ "log_odds_chosen": -1.0068690776824951,
16
+ "log_odds_ratio": -10.175436019897461,
17
+ "logps/chosen": -21.69235610961914,
18
+ "logps/rejected": -20.685726165771484,
19
+ "loss": 709.8242,
20
+ "nll_loss": 9.674795150756836,
21
  "rewards/accuracies": 0.484375,
22
+ "rewards/chosen": -10.84617805480957,
23
+ "rewards/margins": -0.5033147931098938,
24
+ "rewards/rejected": -10.342863082885742,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 0.09481481481481481,
29
+ "grad_norm": 1455.1102294921875,
30
  "learning_rate": 1.5625e-05,
31
+ "log_odds_chosen": -2.8008556365966797,
32
+ "log_odds_ratio": -10.15751838684082,
33
+ "logps/chosen": -20.97389793395996,
34
+ "logps/rejected": -18.17340660095215,
35
+ "loss": 714.7821,
36
+ "nll_loss": 8.685011863708496,
37
  "rewards/accuracies": 0.44062501192092896,
38
+ "rewards/chosen": -10.48694896697998,
39
+ "rewards/margins": -1.4002447128295898,
40
+ "rewards/rejected": -9.086703300476074,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.14222222222222222,
45
+ "grad_norm": 638.308837890625,
46
  "learning_rate": 2.34375e-05,
47
+ "log_odds_chosen": -1.2725921869277954,
48
+ "log_odds_ratio": -10.43528938293457,
49
+ "logps/chosen": -20.847124099731445,
50
+ "logps/rejected": -19.573030471801758,
51
+ "loss": 687.155,
52
+ "nll_loss": 8.07744312286377,
53
+ "rewards/accuracies": 0.512499988079071,
54
+ "rewards/chosen": -10.423562049865723,
55
+ "rewards/margins": -0.6370473504066467,
56
+ "rewards/rejected": -9.786515235900879,
57
  "step": 15
58
  },
59
  {
60
  "epoch": 0.18962962962962962,
61
+ "grad_norm": 5364.12744140625,
62
  "learning_rate": 3.125e-05,
63
+ "log_odds_chosen": -3.7516868114471436,
64
+ "log_odds_ratio": -8.449677467346191,
65
+ "logps/chosen": -15.886372566223145,
66
+ "logps/rejected": -12.137221336364746,
67
+ "loss": 567.2934,
68
+ "nll_loss": 6.516029357910156,
69
+ "rewards/accuracies": 0.4312500059604645,
70
+ "rewards/chosen": -7.943186283111572,
71
+ "rewards/margins": -1.8745750188827515,
72
+ "rewards/rejected": -6.068610668182373,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 0.23703703703703705,
77
+ "grad_norm": 732.103271484375,
78
  "learning_rate": 3.90625e-05,
79
+ "log_odds_chosen": -0.05239830166101456,
80
+ "log_odds_ratio": -1.5554496049880981,
81
+ "logps/chosen": -3.6205036640167236,
82
+ "logps/rejected": -3.5546963214874268,
83
+ "loss": 117.5423,
84
+ "nll_loss": 3.4973304271698,
85
+ "rewards/accuracies": 0.5531250238418579,
86
+ "rewards/chosen": -1.8102518320083618,
87
+ "rewards/margins": -0.03290349990129471,
88
+ "rewards/rejected": -1.7773481607437134,
89
  "step": 25
90
  },
91
  {
92
  "epoch": 0.28444444444444444,
93
+ "grad_norm": 235.78294372558594,
94
  "learning_rate": 4.6875e-05,
95
+ "log_odds_chosen": -0.004375058226287365,
96
+ "log_odds_ratio": -0.9642303586006165,
97
+ "logps/chosen": -2.1026854515075684,
98
+ "logps/rejected": -2.0986106395721436,
99
+ "loss": 67.608,
100
+ "nll_loss": 2.820711851119995,
101
+ "rewards/accuracies": 0.518750011920929,
102
+ "rewards/chosen": -1.0513427257537842,
103
+ "rewards/margins": -0.002037324011325836,
104
+ "rewards/rejected": -1.0493053197860718,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 0.33185185185185184,
109
+ "grad_norm": 201.821533203125,
110
  "learning_rate": 4.998613757348784e-05,
111
+ "log_odds_chosen": 0.23235782980918884,
112
+ "log_odds_ratio": -0.796781063079834,
113
+ "logps/chosen": -1.6877104043960571,
114
+ "logps/rejected": -1.896958589553833,
115
+ "loss": 51.2266,
116
+ "nll_loss": 2.3120052814483643,
117
+ "rewards/accuracies": 0.5093749761581421,
118
+ "rewards/chosen": -0.8438552021980286,
119
+ "rewards/margins": 0.10462428629398346,
120
+ "rewards/rejected": -0.9484792947769165,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 0.37925925925925924,
125
+ "grad_norm": 501.2691345214844,
126
  "learning_rate": 4.990147841143462e-05,
127
+ "log_odds_chosen": 0.3352593779563904,
128
+ "log_odds_ratio": -0.7122059464454651,
129
+ "logps/chosen": -1.5017939805984497,
130
+ "logps/rejected": -1.7836459875106812,
131
+ "loss": 44.764,
132
+ "nll_loss": 2.343039035797119,
133
+ "rewards/accuracies": 0.606249988079071,
134
+ "rewards/chosen": -0.7508969902992249,
135
+ "rewards/margins": 0.1409260481595993,
136
+ "rewards/rejected": -0.8918229937553406,
137
  "step": 40
138
  },
139
  {
140
  "epoch": 0.4266666666666667,
141
+ "grad_norm": 224.84271240234375,
142
  "learning_rate": 4.97401218720448e-05,
143
+ "log_odds_chosen": 0.3514587879180908,
144
+ "log_odds_ratio": -0.6827880144119263,
145
+ "logps/chosen": -1.4357855319976807,
146
+ "logps/rejected": -1.7441604137420654,
147
+ "loss": 42.3725,
148
+ "nll_loss": 2.1640477180480957,
149
+ "rewards/accuracies": 0.606249988079071,
150
+ "rewards/chosen": -0.7178927659988403,
151
+ "rewards/margins": 0.15418751537799835,
152
+ "rewards/rejected": -0.8720802068710327,
153
  "step": 45
154
  },
155
  {
156
  "epoch": 0.4740740740740741,
157
+ "grad_norm": 208.4950714111328,
158
  "learning_rate": 4.9502564938797946e-05,
159
+ "log_odds_chosen": 0.3455984592437744,
160
+ "log_odds_ratio": -0.6899778842926025,
161
+ "logps/chosen": -1.336740493774414,
162
+ "logps/rejected": -1.6336427927017212,
163
+ "loss": 39.3189,
164
+ "nll_loss": 2.020599842071533,
165
+ "rewards/accuracies": 0.6156250238418579,
166
+ "rewards/chosen": -0.668370246887207,
167
+ "rewards/margins": 0.14845120906829834,
168
+ "rewards/rejected": -0.8168213963508606,
169
  "step": 50
170
  },
171
  {
172
  "epoch": 0.5214814814814814,
173
+ "grad_norm": 1214.6304931640625,
174
  "learning_rate": 4.918953929490768e-05,
175
+ "log_odds_chosen": 1.7929474115371704,
176
+ "log_odds_ratio": -0.681054413318634,
177
+ "logps/chosen": -1.559952735900879,
178
+ "logps/rejected": -3.2753818035125732,
179
+ "loss": 24.7409,
180
+ "nll_loss": 2.3052754402160645,
181
+ "rewards/accuracies": 0.628125011920929,
182
+ "rewards/chosen": -0.7799763679504395,
183
+ "rewards/margins": 0.8577146530151367,
184
+ "rewards/rejected": -1.6376909017562866,
185
  "step": 55
186
  },
187
  {
188
  "epoch": 0.5688888888888889,
189
+ "grad_norm": 1253.04541015625,
190
  "learning_rate": 4.88020090697132e-05,
191
+ "log_odds_chosen": 6.016274452209473,
192
+ "log_odds_ratio": -0.6842174530029297,
193
+ "logps/chosen": -2.042940616607666,
194
+ "logps/rejected": -7.971026420593262,
195
+ "loss": -26.6321,
196
+ "nll_loss": 2.7467756271362305,
197
+ "rewards/accuracies": 0.668749988079071,
198
+ "rewards/chosen": -1.021470308303833,
199
+ "rewards/margins": 2.964043140411377,
200
+ "rewards/rejected": -3.985513210296631,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 0.6162962962962963,
205
+ "grad_norm": 22529.046875,
206
  "learning_rate": 4.834116786912897e-05,
207
+ "log_odds_chosen": 13.827421188354492,
208
+ "log_odds_ratio": -6.549749851226807,
209
+ "logps/chosen": -11.553215026855469,
210
+ "logps/rejected": -25.299327850341797,
211
+ "loss": 152.311,
212
+ "nll_loss": 9.787839889526367,
213
+ "rewards/accuracies": 0.6499999761581421,
214
+ "rewards/chosen": -5.776607513427734,
215
+ "rewards/margins": 6.873056888580322,
216
+ "rewards/rejected": -12.649663925170898,
217
  "step": 65
218
  },
219
  {
220
  "epoch": 0.6637037037037037,
221
+ "grad_norm": 6729.6181640625,
222
  "learning_rate": 4.7808435099299045e-05,
223
+ "log_odds_chosen": 9.947161674499512,
224
+ "log_odds_ratio": -1.2594565153121948,
225
+ "logps/chosen": -2.880189895629883,
226
+ "logps/rejected": -12.729331970214844,
227
+ "loss": -62.2908,
228
+ "nll_loss": 3.341439723968506,
229
+ "rewards/accuracies": 0.699999988079071,
230
+ "rewards/chosen": -1.4400949478149414,
231
+ "rewards/margins": 4.9245710372924805,
232
+ "rewards/rejected": -6.364665985107422,
233
  "step": 70
234
  },
235
  {
236
  "epoch": 0.7111111111111111,
237
+ "grad_norm": 9763.5361328125,
238
  "learning_rate": 4.720545159477922e-05,
239
+ "log_odds_chosen": 9.978219985961914,
240
+ "log_odds_ratio": -2.5337777137756348,
241
+ "logps/chosen": -4.444361686706543,
242
+ "logps/rejected": -14.362889289855957,
243
+ "loss": -14.9101,
244
+ "nll_loss": 4.244600296020508,
245
+ "rewards/accuracies": 0.640625,
246
+ "rewards/chosen": -2.2221808433532715,
247
+ "rewards/margins": 4.959264278411865,
248
+ "rewards/rejected": -7.1814446449279785,
249
  "step": 75
250
  },
251
  {
252
  "epoch": 0.7585185185185185,
253
+ "grad_norm": 19205.619140625,
254
  "learning_rate": 4.653407456471222e-05,
255
+ "log_odds_chosen": 9.231340408325195,
256
+ "log_odds_ratio": -3.549084186553955,
257
+ "logps/chosen": -9.340559959411621,
258
+ "logps/rejected": -18.532567977905273,
259
+ "loss": 153.5262,
260
+ "nll_loss": 9.000253677368164,
261
+ "rewards/accuracies": 0.5843750238418579,
262
+ "rewards/chosen": -4.6702799797058105,
263
+ "rewards/margins": 4.596003532409668,
264
+ "rewards/rejected": -9.266283988952637,
265
  "step": 80
266
  },
267
  {
268
  "epoch": 0.8059259259259259,
269
+ "grad_norm": 1391.7034912109375,
270
  "learning_rate": 4.579637187256222e-05,
271
+ "log_odds_chosen": 6.83356237411499,
272
+ "log_odds_ratio": -0.8187046051025391,
273
+ "logps/chosen": -1.6173499822616577,
274
+ "logps/rejected": -8.405004501342773,
275
+ "loss": -55.3577,
276
+ "nll_loss": 2.6606650352478027,
277
+ "rewards/accuracies": 0.609375,
278
+ "rewards/chosen": -0.8086749911308289,
279
+ "rewards/margins": 3.393826961517334,
280
+ "rewards/rejected": -4.202502250671387,
281
  "step": 85
282
  },
283
  {
284
  "epoch": 0.8533333333333334,
285
+ "grad_norm": 714.6627197265625,
286
  "learning_rate": 4.499461566702685e-05,
287
+ "log_odds_chosen": 12.91296100616455,
288
+ "log_odds_ratio": -0.7300605177879333,
289
+ "logps/chosen": -3.667490005493164,
290
+ "logps/rejected": -16.505706787109375,
291
+ "loss": -86.1328,
292
+ "nll_loss": 4.034636974334717,
293
  "rewards/accuracies": 0.6156250238418579,
294
+ "rewards/chosen": -1.833745002746582,
295
+ "rewards/margins": 6.419107913970947,
296
+ "rewards/rejected": -8.252853393554688,
297
  "step": 90
298
  },
299
  {
300
  "epoch": 0.9007407407407407,
301
+ "grad_norm": 5335.16162109375,
302
  "learning_rate": 4.413127538374411e-05,
303
+ "log_odds_chosen": 21.759021759033203,
304
+ "log_odds_ratio": -5.81619930267334,
305
+ "logps/chosen": -8.227999687194824,
306
+ "logps/rejected": -29.910602569580078,
307
+ "loss": -80.9532,
308
+ "nll_loss": 7.397497653961182,
309
+ "rewards/accuracies": 0.637499988079071,
310
+ "rewards/chosen": -4.113999843597412,
311
+ "rewards/margins": 10.841300010681152,
312
+ "rewards/rejected": -14.955301284790039,
313
  "step": 95
314
  },
315
  {
316
  "epoch": 0.9481481481481482,
317
+ "grad_norm": 9169.87890625,
318
  "learning_rate": 4.320901013934887e-05,
319
+ "log_odds_chosen": 21.403528213500977,
320
+ "log_odds_ratio": -4.2520060539245605,
321
+ "logps/chosen": -6.323123931884766,
322
+ "logps/rejected": -27.63702964782715,
323
+ "loss": -135.6809,
324
+ "nll_loss": 5.874188423156738,
325
+ "rewards/accuracies": 0.684374988079071,
326
+ "rewards/chosen": -3.161561965942383,
327
+ "rewards/margins": 10.656951904296875,
328
+ "rewards/rejected": -13.818514823913574,
329
  "step": 100
330
  },
331
  {
332
  "epoch": 0.9955555555555555,
333
+ "grad_norm": 15093.3359375,
334
  "learning_rate": 4.223066054130568e-05,
335
+ "log_odds_chosen": 21.14289093017578,
336
+ "log_odds_ratio": -2.737898349761963,
337
+ "logps/chosen": -6.241971015930176,
338
+ "logps/rejected": -27.327472686767578,
339
+ "loss": -135.2989,
340
+ "nll_loss": 6.4709601402282715,
341
+ "rewards/accuracies": 0.640625,
342
+ "rewards/chosen": -3.120985507965088,
343
+ "rewards/margins": 10.542750358581543,
344
+ "rewards/rejected": -13.663736343383789,
345
  "step": 105
346
  },
347
  {
348
  "epoch": 1.037925925925926,
349
+ "grad_norm": 2860.319580078125,
350
  "learning_rate": 4.1199239938743797e-05,
351
+ "log_odds_chosen": 14.374427795410156,
352
+ "log_odds_ratio": -0.5818507075309753,
353
+ "logps/chosen": -2.059671401977539,
354
+ "logps/rejected": -16.356985092163086,
355
+ "loss": -143.7255,
356
+ "nll_loss": 2.7815299034118652,
357
  "rewards/accuracies": 0.6538461446762085,
358
+ "rewards/chosen": -1.0298357009887695,
359
+ "rewards/margins": 7.148656368255615,
360
+ "rewards/rejected": -8.178492546081543,
361
  "step": 110
362
  },
363
  {
364
  "epoch": 1.0853333333333333,
365
+ "grad_norm": 32810.50390625,
366
  "learning_rate": 4.0117925141242174e-05,
367
+ "log_odds_chosen": 27.462366104125977,
368
+ "log_odds_ratio": -3.54345703125,
369
+ "logps/chosen": -7.290637016296387,
370
+ "logps/rejected": -34.6160888671875,
371
+ "loss": -200.7838,
372
+ "nll_loss": 7.086569786071777,
373
+ "rewards/accuracies": 0.7124999761581421,
374
+ "rewards/chosen": -3.6453185081481934,
375
+ "rewards/margins": 13.662725448608398,
376
+ "rewards/rejected": -17.30804443359375,
377
  "step": 115
378
  },
379
  {
380
  "epoch": 1.1327407407407408,
381
+ "grad_norm": 2398.583984375,
382
  "learning_rate": 3.899004663415084e-05,
383
+ "log_odds_chosen": 17.45090675354004,
384
+ "log_odds_ratio": -4.304950714111328,
385
+ "logps/chosen": -7.110805511474609,
386
+ "logps/rejected": -24.452959060668945,
387
+ "loss": -47.5812,
388
+ "nll_loss": 7.3393707275390625,
389
+ "rewards/accuracies": 0.643750011920929,
390
+ "rewards/chosen": -3.5554027557373047,
391
+ "rewards/margins": 8.671076774597168,
392
+ "rewards/rejected": -12.226479530334473,
393
  "step": 120
394
  },
395
  {
396
  "epoch": 1.1801481481481482,
397
+ "grad_norm": 23893.76171875,
398
  "learning_rate": 3.781907832058587e-05,
399
+ "log_odds_chosen": 24.439823150634766,
400
+ "log_odds_ratio": -5.230821132659912,
401
+ "logps/chosen": -8.570024490356445,
402
+ "logps/rejected": -32.90158462524414,
403
+ "loss": -112.3321,
404
+ "nll_loss": 8.563321113586426,
405
+ "rewards/accuracies": 0.6875,
406
+ "rewards/chosen": -4.285012245178223,
407
+ "rewards/margins": 12.165781021118164,
408
+ "rewards/rejected": -16.45079231262207,
409
  "step": 125
410
  },
411
  {
412
  "epoch": 1.2275555555555555,
413
+ "grad_norm": 2984.347900390625,
414
  "learning_rate": 3.660862682169282e-05,
415
+ "log_odds_chosen": 31.73781967163086,
416
+ "log_odds_ratio": -3.9809155464172363,
417
+ "logps/chosen": -10.971330642700195,
418
+ "logps/rejected": -42.56734085083008,
419
+ "loss": -150.9388,
420
+ "nll_loss": 10.558366775512695,
421
+ "rewards/accuracies": 0.690625011920929,
422
+ "rewards/chosen": -5.485665321350098,
423
+ "rewards/margins": 15.798006057739258,
424
+ "rewards/rejected": -21.28367042541504,
425
  "step": 130
426
  },
427
  {
428
  "epoch": 1.274962962962963,
429
+ "grad_norm": 8554.173828125,
430
  "learning_rate": 3.5362420368134356e-05,
431
+ "log_odds_chosen": 28.130645751953125,
432
+ "log_odds_ratio": -1.710423231124878,
433
+ "logps/chosen": -3.9046826362609863,
434
+ "logps/rejected": -31.89151382446289,
435
+ "loss": -319.8829,
436
+ "nll_loss": 3.7381324768066406,
437
+ "rewards/accuracies": 0.6968749761581421,
438
+ "rewards/chosen": -1.9523413181304932,
439
+ "rewards/margins": 13.993415832519531,
440
+ "rewards/rejected": -15.945756912231445,
441
  "step": 135
442
  },
443
  {
444
  "epoch": 1.3223703703703704,
445
+ "grad_norm": 4571.39404296875,
446
  "learning_rate": 3.408429731701635e-05,
447
+ "log_odds_chosen": 23.899776458740234,
448
+ "log_odds_ratio": -3.7175846099853516,
449
+ "logps/chosen": -5.324652671813965,
450
+ "logps/rejected": -29.102670669555664,
451
+ "loss": -207.4437,
452
+ "nll_loss": 5.260149955749512,
453
+ "rewards/accuracies": 0.6781250238418579,
454
+ "rewards/chosen": -2.6623263359069824,
455
+ "rewards/margins": 11.889008522033691,
456
+ "rewards/rejected": -14.551335334777832,
457
  "step": 140
458
  },
459
  {
460
  "epoch": 1.3697777777777778,
461
+ "grad_norm": 2103.40673828125,
462
  "learning_rate": 3.2778194329621104e-05,
463
+ "log_odds_chosen": 20.076154708862305,
464
+ "log_odds_ratio": -2.8479440212249756,
465
+ "logps/chosen": -3.987905979156494,
466
+ "logps/rejected": -23.93975257873535,
467
+ "loss": -188.7188,
468
+ "nll_loss": 6.649003505706787,
469
+ "rewards/accuracies": 0.684374988079071,
470
+ "rewards/chosen": -1.993952989578247,
471
+ "rewards/margins": 9.975922584533691,
472
+ "rewards/rejected": -11.969876289367676,
473
  "step": 145
474
  },
475
  {
476
  "epoch": 1.417185185185185,
477
+ "grad_norm": 21103.2109375,
478
  "learning_rate": 3.144813424636031e-05,
479
+ "log_odds_chosen": 26.536422729492188,
480
+ "log_odds_ratio": -3.6103949546813965,
481
+ "logps/chosen": -7.1077470779418945,
482
+ "logps/rejected": -33.501373291015625,
483
+ "loss": -191.7946,
484
+ "nll_loss": 10.428869247436523,
485
+ "rewards/accuracies": 0.6937500238418579,
486
+ "rewards/chosen": -3.5538735389709473,
487
+ "rewards/margins": 13.196813583374023,
488
+ "rewards/rejected": -16.750686645507812,
489
  "step": 150
490
  },
491
  {
492
  "epoch": 1.4645925925925927,
493
+ "grad_norm": 20648.404296875,
494
  "learning_rate": 3.0098213696293542e-05,
495
+ "log_odds_chosen": 30.618228912353516,
496
+ "log_odds_ratio": -8.441522598266602,
497
+ "logps/chosen": -22.582386016845703,
498
+ "logps/rejected": -53.13446044921875,
499
+ "loss": 236.5034,
500
+ "nll_loss": 25.505130767822266,
501
+ "rewards/accuracies": 0.643750011920929,
502
+ "rewards/chosen": -11.291193008422852,
503
+ "rewards/margins": 15.276036262512207,
504
+ "rewards/rejected": -26.567230224609375,
505
  "step": 155
506
  },
507
  {
508
  "epoch": 1.512,
509
+ "grad_norm": 6142.369140625,
510
  "learning_rate": 2.8732590479375165e-05,
511
+ "log_odds_chosen": 28.4414005279541,
512
+ "log_odds_ratio": -4.836407661437988,
513
+ "logps/chosen": -10.202522277832031,
514
+ "logps/rejected": -38.521305084228516,
515
+ "loss": -124.1278,
516
+ "nll_loss": 7.826313018798828,
517
+ "rewards/accuracies": 0.625,
518
+ "rewards/chosen": -5.101261138916016,
519
+ "rewards/margins": 14.159391403198242,
520
+ "rewards/rejected": -19.260652542114258,
521
  "step": 160
522
  },
523
  {
524
  "epoch": 1.5594074074074074,
525
+ "grad_norm": 887.5225219726562,
526
  "learning_rate": 2.7355470760292956e-05,
527
+ "log_odds_chosen": 15.544232368469238,
528
+ "log_odds_ratio": -2.6855194568634033,
529
+ "logps/chosen": -3.4005138874053955,
530
+ "logps/rejected": -18.811342239379883,
531
+ "loss": -135.4756,
532
+ "nll_loss": 4.0409064292907715,
533
+ "rewards/accuracies": 0.671875,
534
+ "rewards/chosen": -1.7002569437026978,
535
+ "rewards/margins": 7.705413818359375,
536
+ "rewards/rejected": -9.405671119689941,
537
  "step": 165
538
  },
539
  {
540
  "epoch": 1.6068148148148147,
541
+ "grad_norm": 1129.25048828125,
542
  "learning_rate": 2.597109611334169e-05,
543
+ "log_odds_chosen": 17.921756744384766,
544
+ "log_odds_ratio": -2.1972568035125732,
545
+ "logps/chosen": -3.035590171813965,
546
+ "logps/rejected": -20.839210510253906,
547
+ "loss": -185.0614,
548
+ "nll_loss": 3.198902130126953,
549
+ "rewards/accuracies": 0.6875,
550
+ "rewards/chosen": -1.5177950859069824,
551
+ "rewards/margins": 8.901809692382812,
552
+ "rewards/rejected": -10.419605255126953,
553
  "step": 170
554
  },
555
  {
556
  "epoch": 1.6542222222222223,
557
+ "grad_norm": 6776.86669921875,
558
  "learning_rate": 2.458373045823404e-05,
559
+ "log_odds_chosen": 19.142057418823242,
560
+ "log_odds_ratio": -4.326524257659912,
561
+ "logps/chosen": -8.229945182800293,
562
+ "logps/rejected": -27.229846954345703,
563
+ "loss": -37.5736,
564
+ "nll_loss": 9.928422927856445,
565
+ "rewards/accuracies": 0.6968749761581421,
566
+ "rewards/chosen": -4.1149725914001465,
567
+ "rewards/margins": 9.499950408935547,
568
+ "rewards/rejected": -13.614923477172852,
569
  "step": 175
570
  },
571
  {
572
  "epoch": 1.7016296296296296,
573
+ "grad_norm": 6735.8046875,
574
  "learning_rate": 2.3197646927086697e-05,
575
+ "log_odds_chosen": 21.28363609313965,
576
+ "log_odds_ratio": -3.7137675285339355,
577
+ "logps/chosen": -6.68829345703125,
578
+ "logps/rejected": -27.832469940185547,
579
+ "loss": -121.5285,
580
+ "nll_loss": 10.662646293640137,
581
+ "rewards/accuracies": 0.675000011920929,
582
+ "rewards/chosen": -3.344146728515625,
583
+ "rewards/margins": 10.572088241577148,
584
+ "rewards/rejected": -13.916234970092773,
585
  "step": 180
586
  },
587
  {
588
  "epoch": 1.749037037037037,
589
+ "grad_norm": 980.9172973632812,
590
  "learning_rate": 2.1817114703032176e-05,
591
+ "log_odds_chosen": 11.027368545532227,
592
+ "log_odds_ratio": -1.5065878629684448,
593
+ "logps/chosen": -2.5348973274230957,
594
+ "logps/rejected": -13.437909126281738,
595
+ "loss": -90.6843,
596
+ "nll_loss": 4.682984828948975,
597
+ "rewards/accuracies": 0.65625,
598
+ "rewards/chosen": -1.2674486637115479,
599
+ "rewards/margins": 5.4515061378479,
600
+ "rewards/rejected": -6.718954563140869,
601
  "step": 185
602
  },
603
  {
604
  "epoch": 1.7964444444444445,
605
+ "grad_norm": 2794.48828125,
606
  "learning_rate": 2.0446385870993467e-05,
607
+ "log_odds_chosen": 25.16632080078125,
608
+ "log_odds_ratio": -1.736579179763794,
609
+ "logps/chosen": -2.6034204959869385,
610
+ "logps/rejected": -27.604034423828125,
611
+ "loss": -313.2717,
612
+ "nll_loss": 4.227630138397217,
613
+ "rewards/accuracies": 0.71875,
614
+ "rewards/chosen": -1.3017102479934692,
615
+ "rewards/margins": 12.5003080368042,
616
+ "rewards/rejected": -13.802017211914062,
617
  "step": 190
618
  },
619
  {
620
  "epoch": 1.8438518518518519,
621
+ "grad_norm": 18748.30859375,
622
  "learning_rate": 1.9089682321121834e-05,
623
+ "log_odds_chosen": 25.935617446899414,
624
+ "log_odds_ratio": -5.700267791748047,
625
+ "logps/chosen": -7.655577182769775,
626
+ "logps/rejected": -33.431495666503906,
627
+ "loss": -164.2176,
628
+ "nll_loss": 11.73558235168457,
629
+ "rewards/accuracies": 0.703125,
630
+ "rewards/chosen": -3.8277885913848877,
631
+ "rewards/margins": 12.887959480285645,
632
+ "rewards/rejected": -16.715747833251953,
633
  "step": 195
634
  },
635
  {
636
  "epoch": 1.8912592592592592,
637
+ "grad_norm": 9071.26953125,
638
  "learning_rate": 1.775118274523545e-05,
639
+ "log_odds_chosen": 29.912353515625,
640
+ "log_odds_ratio": -3.8106212615966797,
641
+ "logps/chosen": -7.143023490905762,
642
+ "logps/rejected": -36.88776397705078,
643
+ "loss": -243.6502,
644
+ "nll_loss": 12.543132781982422,
645
+ "rewards/accuracies": 0.715624988079071,
646
+ "rewards/chosen": -3.571511745452881,
647
+ "rewards/margins": 14.872370719909668,
648
+ "rewards/rejected": -18.44388198852539,
649
  "step": 200
650
  },
651
  {
652
  "epoch": 1.9386666666666668,
653
+ "grad_norm": 5131.81005859375,
654
  "learning_rate": 1.643500976631037e-05,
655
+ "log_odds_chosen": 21.7247314453125,
656
+ "log_odds_ratio": -4.9020280838012695,
657
+ "logps/chosen": -8.960885047912598,
658
+ "logps/rejected": -30.520559310913086,
659
+ "loss": -54.6864,
660
+ "nll_loss": 11.589077949523926,
661
  "rewards/accuracies": 0.71875,
662
+ "rewards/chosen": -4.480442523956299,
663
+ "rewards/margins": 10.779837608337402,
664
+ "rewards/rejected": -15.260279655456543,
665
  "step": 205
666
  },
667
  {
668
  "epoch": 1.986074074074074,
669
+ "grad_norm": 6081.4013671875,
670
  "learning_rate": 1.514521724066537e-05,
671
+ "log_odds_chosen": 26.899578094482422,
672
+ "log_odds_ratio": -4.448848724365234,
673
+ "logps/chosen": -6.3961944580078125,
674
+ "logps/rejected": -33.168235778808594,
675
+ "loss": -220.8887,
676
+ "nll_loss": 8.339305877685547,
677
+ "rewards/accuracies": 0.6656249761581421,
678
+ "rewards/chosen": -3.1980972290039062,
679
+ "rewards/margins": 13.386022567749023,
680
+ "rewards/rejected": -16.584117889404297,
681
  "step": 210
682
  },
683
  {
684
  "epoch": 2.0284444444444443,
685
+ "grad_norm": 13102.798828125,
686
  "learning_rate": 1.3885777771950348e-05,
687
+ "log_odds_chosen": 31.693286895751953,
688
+ "log_odds_ratio": -7.093617916107178,
689
+ "logps/chosen": -9.377802848815918,
690
+ "logps/rejected": -40.8663330078125,
691
+ "loss": -178.6939,
692
+ "nll_loss": 11.103485107421875,
693
+ "rewards/accuracies": 0.7062937021255493,
694
+ "rewards/chosen": -4.688901424407959,
695
+ "rewards/margins": 15.744266510009766,
696
+ "rewards/rejected": -20.43316650390625,
697
  "step": 215
698
  },
699
  {
700
  "epoch": 2.075851851851852,
701
+ "grad_norm": 16916.310546875,
702
  "learning_rate": 1.2660570475395683e-05,
703
+ "log_odds_chosen": 29.3436222076416,
704
+ "log_odds_ratio": -5.789451599121094,
705
+ "logps/chosen": -14.939682006835938,
706
+ "logps/rejected": -44.14417266845703,
707
+ "loss": 14.2524,
708
+ "nll_loss": 22.04985809326172,
709
+ "rewards/accuracies": 0.675000011920929,
710
+ "rewards/chosen": -7.469841003417969,
711
+ "rewards/margins": 14.60224437713623,
712
+ "rewards/rejected": -22.072086334228516,
713
  "step": 220
714
  },
715
  {
716
  "epoch": 2.1232592592592594,
717
+ "grad_norm": 17463.740234375,
718
  "learning_rate": 1.1473369030008974e-05,
719
+ "log_odds_chosen": 36.83652877807617,
720
+ "log_odds_ratio": -3.7113094329833984,
721
+ "logps/chosen": -9.069849967956543,
722
+ "logps/rejected": -45.71887969970703,
723
+ "loss": -291.8311,
724
+ "nll_loss": 16.01699447631836,
725
+ "rewards/accuracies": 0.7093750238418579,
726
+ "rewards/chosen": -4.5349249839782715,
727
+ "rewards/margins": 18.324514389038086,
728
+ "rewards/rejected": -22.859439849853516,
729
  "step": 225
730
  },
731
  {
732
  "epoch": 2.1706666666666665,
733
+ "grad_norm": 2066.959228515625,
734
  "learning_rate": 1.0327830055518842e-05,
735
+ "log_odds_chosen": 34.293540954589844,
736
+ "log_odds_ratio": -2.4719889163970947,
737
+ "logps/chosen": -6.306262016296387,
738
+ "logps/rejected": -40.41852951049805,
739
+ "loss": -340.089,
740
+ "nll_loss": 12.120941162109375,
741
+ "rewards/accuracies": 0.703125,
742
+ "rewards/chosen": -3.1531310081481934,
743
+ "rewards/margins": 17.056133270263672,
744
+ "rewards/rejected": -20.209264755249023,
745
  "step": 230
746
  },
747
  {
748
  "epoch": 2.218074074074074,
749
+ "grad_norm": 3184.418212890625,
750
  "learning_rate": 9.227481849865235e-06,
751
+ "log_odds_chosen": 36.27004623413086,
752
+ "log_odds_ratio": -2.707547903060913,
753
+ "logps/chosen": -3.9607880115509033,
754
+ "logps/rejected": -40.02045440673828,
755
+ "loss": -446.1736,
756
+ "nll_loss": 6.836036682128906,
757
+ "rewards/accuracies": 0.734375,
758
+ "rewards/chosen": -1.9803940057754517,
759
+ "rewards/margins": 18.02983283996582,
760
+ "rewards/rejected": -20.01022720336914,
761
  "step": 235
762
  },
763
  {
764
  "epoch": 2.2654814814814817,
765
+ "grad_norm": 12459.7412109375,
766
  "learning_rate": 8.175713521924978e-06,
767
+ "log_odds_chosen": 31.65945816040039,
768
+ "log_odds_ratio": -2.4659223556518555,
769
+ "logps/chosen": -4.006608963012695,
770
+ "logps/rejected": -35.472171783447266,
771
+ "loss": -371.1167,
772
+ "nll_loss": 7.588781833648682,
773
+ "rewards/accuracies": 0.71875,
774
+ "rewards/chosen": -2.0033044815063477,
775
+ "rewards/margins": 15.732783317565918,
776
+ "rewards/rejected": -17.736085891723633,
777
  "step": 240
778
  },
779
  {
780
  "epoch": 2.3128888888888888,
781
+ "grad_norm": 10665.607421875,
782
  "learning_rate": 7.1757645529443665e-06,
783
+ "log_odds_chosen": 41.71519088745117,
784
+ "log_odds_ratio": -2.4143545627593994,
785
+ "logps/chosen": -5.24094295501709,
786
+ "logps/rejected": -46.76899337768555,
787
+ "loss": -492.2525,
788
+ "nll_loss": 14.035673141479492,
789
+ "rewards/accuracies": 0.765625,
790
+ "rewards/chosen": -2.620471477508545,
791
+ "rewards/margins": 20.764026641845703,
792
+ "rewards/rejected": -23.384496688842773,
793
  "step": 245
794
  },
795
  {
796
  "epoch": 2.3602962962962963,
797
+ "grad_norm": 15119.1689453125,
798
  "learning_rate": 6.230714818829733e-06,
799
+ "log_odds_chosen": 35.395755767822266,
800
+ "log_odds_ratio": -1.310185194015503,
801
+ "logps/chosen": -5.026707649230957,
802
+ "logps/rejected": -40.237152099609375,
803
+ "loss": -398.4109,
804
+ "nll_loss": 9.918493270874023,
805
  "rewards/accuracies": 0.7093750238418579,
806
+ "rewards/chosen": -2.5133538246154785,
807
+ "rewards/margins": 17.605222702026367,
808
+ "rewards/rejected": -20.118576049804688,
809
  "step": 250
810
  },
811
  {
812
  "epoch": 2.407703703703704,
813
+ "grad_norm": 5417.455078125,
814
  "learning_rate": 5.343475104027743e-06,
815
+ "log_odds_chosen": 34.123165130615234,
816
+ "log_odds_ratio": -2.2660939693450928,
817
+ "logps/chosen": -6.2752251625061035,
818
+ "logps/rejected": -40.19994354248047,
819
+ "loss": -337.6817,
820
+ "nll_loss": 11.942907333374023,
821
+ "rewards/accuracies": 0.75,
822
+ "rewards/chosen": -3.1376125812530518,
823
+ "rewards/margins": 16.962358474731445,
824
+ "rewards/rejected": -20.099971771240234,
825
  "step": 255
826
  },
827
  {
828
  "epoch": 2.455111111111111,
829
+ "grad_norm": 5251.4404296875,
830
  "learning_rate": 4.516778136213037e-06,
831
+ "log_odds_chosen": 29.794910430908203,
832
+ "log_odds_ratio": -3.3600330352783203,
833
+ "logps/chosen": -5.655725955963135,
834
+ "logps/rejected": -35.23265838623047,
835
+ "loss": -288.0842,
836
+ "nll_loss": 10.556007385253906,
837
+ "rewards/accuracies": 0.75,
838
+ "rewards/chosen": -2.8278629779815674,
839
+ "rewards/margins": 14.78846549987793,
840
+ "rewards/rejected": -17.616329193115234,
841
  "step": 260
842
  },
843
  {
844
  "epoch": 2.5025185185185186,
845
+ "grad_norm": 17512.310546875,
846
  "learning_rate": 3.7531701693965554e-06,
847
+ "log_odds_chosen": 43.79105758666992,
848
+ "log_odds_ratio": -4.43022346496582,
849
+ "logps/chosen": -8.150279998779297,
850
+ "logps/rejected": -51.75994110107422,
851
+ "loss": -432.5288,
852
+ "nll_loss": 18.760528564453125,
853
+ "rewards/accuracies": 0.715624988079071,
854
+ "rewards/chosen": -4.075139999389648,
855
+ "rewards/margins": 21.804828643798828,
856
+ "rewards/rejected": -25.87997055053711,
857
  "step": 265
858
  },
859
  {
860
  "epoch": 2.549925925925926,
861
+ "grad_norm": 16464.80859375,
862
  "learning_rate": 3.055003141378948e-06,
863
+ "log_odds_chosen": 45.24549102783203,
864
+ "log_odds_ratio": -4.265998840332031,
865
+ "logps/chosen": -9.220375061035156,
866
+ "logps/rejected": -54.259765625,
867
+ "loss": -420.995,
868
+ "nll_loss": 17.882661819458008,
869
+ "rewards/accuracies": 0.7281249761581421,
870
+ "rewards/chosen": -4.610187530517578,
871
+ "rewards/margins": 22.51969337463379,
872
+ "rewards/rejected": -27.1298828125,
873
  "step": 270
874
  },
875
  {
876
  "epoch": 2.5973333333333333,
877
+ "grad_norm": 10890.8203125,
878
  "learning_rate": 2.424427429704365e-06,
879
+ "log_odds_chosen": 35.03772735595703,
880
+ "log_odds_ratio": -4.138024806976318,
881
+ "logps/chosen": -8.157249450683594,
882
+ "logps/rejected": -42.98029327392578,
883
+ "loss": -291.3517,
884
+ "nll_loss": 13.331751823425293,
885
+ "rewards/accuracies": 0.765625,
886
+ "rewards/chosen": -4.078624725341797,
887
+ "rewards/margins": 17.411518096923828,
888
+ "rewards/rejected": -21.49014663696289,
889
  "step": 275
890
  },
891
  {
892
  "epoch": 2.644740740740741,
893
+ "grad_norm": 12637.064453125,
894
  "learning_rate": 1.8633852284264508e-06,
895
+ "log_odds_chosen": 41.4226188659668,
896
+ "log_odds_ratio": -3.3395819664001465,
897
+ "logps/chosen": -5.7105512619018555,
898
+ "logps/rejected": -46.93029022216797,
899
+ "loss": -472.0542,
900
+ "nll_loss": 11.638737678527832,
901
+ "rewards/accuracies": 0.7281249761581421,
902
+ "rewards/chosen": -2.8552756309509277,
903
+ "rewards/margins": 20.609867095947266,
904
+ "rewards/rejected": -23.465145111083984,
905
  "step": 280
906
  },
907
  {
908
  "epoch": 2.6921481481481484,
909
+ "grad_norm": 18065.2109375,
910
  "learning_rate": 1.3736045660864034e-06,
911
+ "log_odds_chosen": 33.556556701660156,
912
+ "log_odds_ratio": -4.451475620269775,
913
+ "logps/chosen": -7.034272193908691,
914
+ "logps/rejected": -40.41550064086914,
915
+ "loss": -304.8266,
916
+ "nll_loss": 12.904256820678711,
917
+ "rewards/accuracies": 0.7437499761581421,
918
+ "rewards/chosen": -3.5171360969543457,
919
+ "rewards/margins": 16.690614700317383,
920
+ "rewards/rejected": -20.20775032043457,
921
  "step": 285
922
  },
923
  {
924
  "epoch": 2.7395555555555555,
925
+ "grad_norm": 13026.53515625,
926
  "learning_rate": 9.565939833279192e-07,
927
+ "log_odds_chosen": 35.401798248291016,
928
+ "log_odds_ratio": -3.9767117500305176,
929
+ "logps/chosen": -7.844551086425781,
930
+ "logps/rejected": -43.053428649902344,
931
+ "loss": -308.0105,
932
+ "nll_loss": 11.636465072631836,
933
+ "rewards/accuracies": 0.721875011920929,
934
+ "rewards/chosen": -3.9222755432128906,
935
+ "rewards/margins": 17.604434967041016,
936
+ "rewards/rejected": -21.526714324951172,
937
  "step": 290
938
  },
939
  {
940
  "epoch": 2.786962962962963,
941
+ "grad_norm": 7266.41943359375,
942
  "learning_rate": 6.136378865420872e-07,
943
+ "log_odds_chosen": 32.378849029541016,
944
+ "log_odds_ratio": -2.3196144104003906,
945
+ "logps/chosen": -5.068166255950928,
946
+ "logps/rejected": -37.264400482177734,
947
+ "loss": -348.6297,
948
+ "nll_loss": 9.656694412231445,
949
+ "rewards/accuracies": 0.715624988079071,
950
+ "rewards/chosen": -2.534083127975464,
951
+ "rewards/margins": 16.098114013671875,
952
+ "rewards/rejected": -18.632200241088867,
953
  "step": 295
954
  },
955
  {
956
  "epoch": 2.83437037037037,
957
+ "grad_norm": 6610.1474609375,
958
  "learning_rate": 3.45792591853214e-07,
959
+ "log_odds_chosen": 41.97068786621094,
960
+ "log_odds_ratio": -1.2807286977767944,
961
+ "logps/chosen": -3.7067933082580566,
962
+ "logps/rejected": -45.47202682495117,
963
+ "loss": -545.1357,
964
+ "nll_loss": 8.846769332885742,
965
+ "rewards/accuracies": 0.7406250238418579,
966
+ "rewards/chosen": -1.8533966541290283,
967
+ "rewards/margins": 20.88261604309082,
968
+ "rewards/rejected": -22.736013412475586,
969
  "step": 300
970
  },
971
  {
972
  "epoch": 2.8817777777777778,
973
+ "grad_norm": 8774.75390625,
974
  "learning_rate": 1.538830716302092e-07,
975
+ "log_odds_chosen": 37.96703338623047,
976
+ "log_odds_ratio": -2.458489179611206,
977
+ "logps/chosen": -4.410444736480713,
978
+ "logps/rejected": -42.13166046142578,
979
+ "loss": -457.7542,
980
+ "nll_loss": 8.809645652770996,
981
+ "rewards/accuracies": 0.71875,
982
+ "rewards/chosen": -2.2052223682403564,
983
+ "rewards/margins": 18.860610961914062,
984
+ "rewards/rejected": -21.06583023071289,
985
  "step": 305
986
  },
987
  {
988
  "epoch": 2.9291851851851853,
989
+ "grad_norm": 5141.3994140625,
990
  "learning_rate": 3.8500413544415025e-08,
991
+ "log_odds_chosen": 43.835670471191406,
992
+ "log_odds_ratio": -3.51432466506958,
993
+ "logps/chosen": -6.511708736419678,
994
+ "logps/rejected": -50.12602996826172,
995
+ "loss": -484.2083,
996
+ "nll_loss": 11.03735637664795,
997
+ "rewards/accuracies": 0.793749988079071,
998
+ "rewards/chosen": -3.255854368209839,
999
+ "rewards/margins": 21.807161331176758,
1000
+ "rewards/rejected": -25.06301498413086,
1001
  "step": 310
1002
  },
1003
  {
1004
  "epoch": 2.9765925925925925,
1005
+ "grad_norm": 12163.37890625,
1006
  "learning_rate": 0.0,
1007
+ "log_odds_chosen": 37.13484573364258,
1008
+ "log_odds_ratio": -1.1676459312438965,
1009
+ "logps/chosen": -3.6658377647399902,
1010
+ "logps/rejected": -40.594600677490234,
1011
+ "loss": -469.246,
1012
+ "nll_loss": 8.678117752075195,
1013
  "rewards/accuracies": 0.75,
1014
+ "rewards/chosen": -1.8329188823699951,
1015
+ "rewards/margins": 18.46438217163086,
1016
+ "rewards/rejected": -20.297300338745117,
1017
  "step": 315
1018
  },
1019
  {
1020
  "epoch": 2.9765925925925925,
1021
  "step": 315,
1022
  "total_flos": 0.0,
1023
+ "train_loss": -125.51550612676711,
1024
+ "train_runtime": 9350.753,
1025
+ "train_samples_per_second": 2.166,
1026
  "train_steps_per_second": 0.034
1027
  }
1028
  ],