anoaky commited on
Commit
778136f
·
verified ·
1 Parent(s): 479ea87

Training in progress, epoch 1, checkpoint

Browse files
checkpoint-555/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73390260de79e2c8629bcd13a704a7b0a9571a7b60fb682f4a2233bfce71939b
3
  size 1629432864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8149a3fa6dc63e6cfa5e7d3784b20b3374fe33d5c7e5a330a26eabf4b1653ea7
3
  size 1629432864
checkpoint-555/trainer_state.json CHANGED
@@ -8,411 +8,418 @@
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.018026137899954935,
13
- "grad_norm": 37.54188919067383,
14
- "learning_rate": 4.981949458483755e-05,
15
- "loss": 0.8911,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03605227579990987,
20
- "grad_norm": 15.440231323242188,
21
- "learning_rate": 4.963898916967509e-05,
22
- "loss": 0.6165,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.054078413699864804,
27
- "grad_norm": 226.8269500732422,
28
- "learning_rate": 4.945848375451264e-05,
29
- "loss": 0.5151,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07210455159981974,
34
- "grad_norm": 21.70491600036621,
35
- "learning_rate": 4.927797833935018e-05,
36
- "loss": 0.5669,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09013068949977468,
41
- "grad_norm": 79.27781677246094,
42
- "learning_rate": 4.909747292418773e-05,
43
- "loss": 0.5119,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.10815682739972961,
48
- "grad_norm": 15.314950942993164,
49
- "learning_rate": 4.891696750902527e-05,
50
- "loss": 0.474,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.12618296529968454,
55
- "grad_norm": 25.575763702392578,
56
- "learning_rate": 4.873646209386282e-05,
57
- "loss": 0.478,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.14420910319963948,
62
- "grad_norm": 27.453636169433594,
63
- "learning_rate": 4.855595667870036e-05,
64
- "loss": 0.4285,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.16223524109959442,
69
- "grad_norm": 34.88062286376953,
70
- "learning_rate": 4.837545126353791e-05,
71
- "loss": 0.4317,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.18026137899954936,
76
- "grad_norm": 46.823577880859375,
77
- "learning_rate": 4.819494584837546e-05,
78
- "loss": 0.4899,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.19828751689950427,
83
- "grad_norm": 36.73914337158203,
84
- "learning_rate": 4.8014440433213e-05,
85
- "loss": 0.4839,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.21631365479945922,
90
- "grad_norm": 102.61051940917969,
91
- "learning_rate": 4.783393501805055e-05,
92
- "loss": 0.4795,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.23433979269941416,
97
- "grad_norm": 24.9108829498291,
98
- "learning_rate": 4.765342960288809e-05,
99
- "loss": 0.4332,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.25236593059936907,
104
- "grad_norm": 14.366719245910645,
105
- "learning_rate": 4.747292418772563e-05,
106
- "loss": 0.4811,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.270392068499324,
111
- "grad_norm": 19.95521354675293,
112
- "learning_rate": 4.7292418772563177e-05,
113
- "loss": 0.4544,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.28841820639927895,
118
- "grad_norm": 20.914020538330078,
119
- "learning_rate": 4.711191335740072e-05,
120
- "loss": 0.4646,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.3064443442992339,
125
- "grad_norm": 154.6277313232422,
126
- "learning_rate": 4.693140794223827e-05,
127
- "loss": 0.47,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.32447048219918884,
132
- "grad_norm": 30.73076820373535,
133
- "learning_rate": 4.675090252707581e-05,
134
- "loss": 0.4341,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3424966200991438,
139
- "grad_norm": 14.621489524841309,
140
- "learning_rate": 4.657039711191336e-05,
141
- "loss": 0.5163,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3605227579990987,
146
- "grad_norm": 25.546030044555664,
147
- "learning_rate": 4.63898916967509e-05,
148
- "loss": 0.4601,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.3785488958990536,
153
- "grad_norm": 18.222177505493164,
154
- "learning_rate": 4.620938628158845e-05,
155
- "loss": 0.4398,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.39657503379900855,
160
- "grad_norm": 20.1109676361084,
161
- "learning_rate": 4.602888086642599e-05,
162
- "loss": 0.432,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.4146011716989635,
167
- "grad_norm": 13.729535102844238,
168
- "learning_rate": 4.584837545126354e-05,
169
- "loss": 0.4327,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.43262730959891843,
174
- "grad_norm": 32.50856018066406,
175
- "learning_rate": 4.566787003610109e-05,
176
- "loss": 0.4208,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.45065344749887337,
181
- "grad_norm": 15.05933666229248,
182
- "learning_rate": 4.548736462093863e-05,
183
- "loss": 0.4451,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.4686795853988283,
188
- "grad_norm": 31.787078857421875,
189
- "learning_rate": 4.530685920577618e-05,
190
- "loss": 0.4376,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.48670572329878325,
195
- "grad_norm": 25.446210861206055,
196
- "learning_rate": 4.5126353790613716e-05,
197
- "loss": 0.4131,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5047318611987381,
202
- "grad_norm": 31.328969955444336,
203
- "learning_rate": 4.494584837545127e-05,
204
- "loss": 0.4194,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.5227579990986931,
209
- "grad_norm": 255.53672790527344,
210
- "learning_rate": 4.4765342960288806e-05,
211
- "loss": 0.4984,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.540784136998648,
216
- "grad_norm": 14.31953239440918,
217
- "learning_rate": 4.458483754512636e-05,
218
- "loss": 0.4417,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.558810274898603,
223
- "grad_norm": 26.942129135131836,
224
- "learning_rate": 4.44043321299639e-05,
225
- "loss": 0.4527,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.5768364127985579,
230
- "grad_norm": 36.217586517333984,
231
- "learning_rate": 4.422382671480145e-05,
232
- "loss": 0.4485,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.5948625506985128,
237
- "grad_norm": 38.54646682739258,
238
- "learning_rate": 4.404332129963899e-05,
239
- "loss": 0.4579,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6128886885984678,
244
- "grad_norm": 21.504106521606445,
245
- "learning_rate": 4.386281588447654e-05,
246
- "loss": 0.4117,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6309148264984227,
251
- "grad_norm": 16.283788681030273,
252
- "learning_rate": 4.368231046931408e-05,
253
- "loss": 0.438,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6489409643983777,
258
- "grad_norm": 33.57164001464844,
259
- "learning_rate": 4.350180505415163e-05,
260
- "loss": 0.4043,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.6669671022983326,
265
- "grad_norm": 12.821700096130371,
266
- "learning_rate": 4.332129963898917e-05,
267
- "loss": 0.4119,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.6849932401982876,
272
- "grad_norm": 23.477996826171875,
273
- "learning_rate": 4.314079422382672e-05,
274
- "loss": 0.3783,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7030193780982424,
279
- "grad_norm": 18.2862548828125,
280
- "learning_rate": 4.296028880866426e-05,
281
- "loss": 0.4023,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7210455159981974,
286
- "grad_norm": 19.799530029296875,
287
- "learning_rate": 4.277978339350181e-05,
288
- "loss": 0.419,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7390716538981523,
293
- "grad_norm": 17.081830978393555,
294
- "learning_rate": 4.259927797833935e-05,
295
- "loss": 0.467,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.7570977917981072,
300
- "grad_norm": 24.008344650268555,
301
- "learning_rate": 4.24187725631769e-05,
302
- "loss": 0.4302,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.7751239296980622,
307
- "grad_norm": 40.07936477661133,
308
- "learning_rate": 4.223826714801444e-05,
309
- "loss": 0.3622,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.7931500675980171,
314
- "grad_norm": 19.614171981811523,
315
- "learning_rate": 4.205776173285199e-05,
316
- "loss": 0.4645,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.8111762054979721,
321
- "grad_norm": 32.0594596862793,
322
- "learning_rate": 4.187725631768953e-05,
323
- "loss": 0.3755,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.829202343397927,
328
- "grad_norm": 15.177023887634277,
329
- "learning_rate": 4.169675090252708e-05,
330
- "loss": 0.4275,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.847228481297882,
335
- "grad_norm": 11.258400917053223,
336
- "learning_rate": 4.151624548736462e-05,
337
- "loss": 0.4137,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.8652546191978369,
342
- "grad_norm": 13.211421012878418,
343
- "learning_rate": 4.1335740072202167e-05,
344
- "loss": 0.4337,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.8832807570977917,
349
- "grad_norm": 14.50296401977539,
350
- "learning_rate": 4.115523465703972e-05,
351
- "loss": 0.4318,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9013068949977467,
356
- "grad_norm": 21.239362716674805,
357
- "learning_rate": 4.0974729241877256e-05,
358
- "loss": 0.3939,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9193330328977016,
363
- "grad_norm": 18.6204891204834,
364
- "learning_rate": 4.079422382671481e-05,
365
- "loss": 0.3838,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 0.9373591707976566,
370
- "grad_norm": 21.722551345825195,
371
- "learning_rate": 4.0613718411552346e-05,
372
- "loss": 0.4236,
373
  "step": 520
374
  },
375
  {
376
  "epoch": 0.9553853086976115,
377
- "grad_norm": 23.824357986450195,
378
- "learning_rate": 4.043321299638989e-05,
379
- "loss": 0.4062,
380
  "step": 530
381
  },
382
  {
383
  "epoch": 0.9734114465975665,
384
- "grad_norm": 18.93727684020996,
385
- "learning_rate": 4.0252707581227436e-05,
386
- "loss": 0.4412,
387
  "step": 540
388
  },
389
  {
390
  "epoch": 0.9914375844975214,
391
- "grad_norm": 16.050817489624023,
392
- "learning_rate": 4.007220216606498e-05,
393
- "loss": 0.4142,
394
  "step": 550
395
  },
396
  {
397
  "epoch": 1.0,
398
- "eval_f1": 0.8390086641144469,
399
- "eval_fn": 205,
400
- "eval_fp": 594,
401
- "eval_loss": 0.3744712173938751,
402
- "eval_precision": 0.7780269058295964,
403
- "eval_recall": 0.910362920857018,
404
- "eval_runtime": 73.1391,
405
- "eval_samples_per_second": 63.892,
406
- "eval_steps_per_second": 7.998,
407
- "eval_tn": 1792,
408
- "eval_tp": 2082,
409
  "step": 555
410
  }
411
  ],
412
  "logging_steps": 10,
413
- "max_steps": 2770,
414
  "num_input_tokens_seen": 0,
415
- "num_train_epochs": 5,
416
  "save_steps": 500,
417
  "stateful_callbacks": {
418
  "TrainerControl": {
 
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0,
13
+ "eval_f1": 0.45686228664952067,
14
+ "eval_loss": 0.701814591884613,
15
+ "eval_precision": 0.4909547738693467,
16
+ "eval_recall": 0.42719720157411456,
17
+ "eval_runtime": 72.3647,
18
+ "eval_samples_per_second": 64.576,
19
+ "eval_steps_per_second": 8.084,
20
+ "step": 0
21
+ },
22
  {
23
  "epoch": 0.018026137899954935,
24
+ "grad_norm": 37.129154205322266,
25
+ "learning_rate": 4.9548736462093865e-05,
26
+ "loss": 0.8401,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.03605227579990987,
31
+ "grad_norm": 32.83527374267578,
32
+ "learning_rate": 4.909747292418773e-05,
33
+ "loss": 0.6093,
34
  "step": 20
35
  },
36
  {
37
  "epoch": 0.054078413699864804,
38
+ "grad_norm": 48.55173873901367,
39
+ "learning_rate": 4.864620938628159e-05,
40
+ "loss": 0.5201,
41
  "step": 30
42
  },
43
  {
44
  "epoch": 0.07210455159981974,
45
+ "grad_norm": 24.419078826904297,
46
+ "learning_rate": 4.819494584837546e-05,
47
+ "loss": 0.5847,
48
  "step": 40
49
  },
50
  {
51
  "epoch": 0.09013068949977468,
52
+ "grad_norm": 27.96658706665039,
53
+ "learning_rate": 4.7743682310469314e-05,
54
+ "loss": 0.4958,
55
  "step": 50
56
  },
57
  {
58
  "epoch": 0.10815682739972961,
59
+ "grad_norm": 28.15681266784668,
60
+ "learning_rate": 4.7292418772563177e-05,
61
+ "loss": 0.4754,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.12618296529968454,
66
+ "grad_norm": 27.729917526245117,
67
+ "learning_rate": 4.684115523465704e-05,
68
+ "loss": 0.4621,
69
  "step": 70
70
  },
71
  {
72
  "epoch": 0.14420910319963948,
73
+ "grad_norm": 24.70597267150879,
74
+ "learning_rate": 4.63898916967509e-05,
75
+ "loss": 0.4333,
76
  "step": 80
77
  },
78
  {
79
  "epoch": 0.16223524109959442,
80
+ "grad_norm": 30.800254821777344,
81
+ "learning_rate": 4.5938628158844764e-05,
82
+ "loss": 0.4425,
83
  "step": 90
84
  },
85
  {
86
  "epoch": 0.18026137899954936,
87
+ "grad_norm": 37.59977340698242,
88
+ "learning_rate": 4.548736462093863e-05,
89
+ "loss": 0.4774,
90
  "step": 100
91
  },
92
  {
93
  "epoch": 0.19828751689950427,
94
+ "grad_norm": 36.29924392700195,
95
+ "learning_rate": 4.5036101083032495e-05,
96
+ "loss": 0.4853,
97
  "step": 110
98
  },
99
  {
100
  "epoch": 0.21631365479945922,
101
+ "grad_norm": 22.9832763671875,
102
+ "learning_rate": 4.458483754512636e-05,
103
+ "loss": 0.4419,
104
  "step": 120
105
  },
106
  {
107
  "epoch": 0.23433979269941416,
108
+ "grad_norm": 35.384368896484375,
109
+ "learning_rate": 4.413357400722022e-05,
110
+ "loss": 0.4376,
111
  "step": 130
112
  },
113
  {
114
  "epoch": 0.25236593059936907,
115
+ "grad_norm": 16.165664672851562,
116
+ "learning_rate": 4.368231046931408e-05,
117
+ "loss": 0.4557,
118
  "step": 140
119
  },
120
  {
121
  "epoch": 0.270392068499324,
122
+ "grad_norm": 13.965127944946289,
123
+ "learning_rate": 4.3231046931407945e-05,
124
+ "loss": 0.4723,
125
  "step": 150
126
  },
127
  {
128
  "epoch": 0.28841820639927895,
129
+ "grad_norm": 30.59395980834961,
130
+ "learning_rate": 4.277978339350181e-05,
131
+ "loss": 0.4656,
132
  "step": 160
133
  },
134
  {
135
  "epoch": 0.3064443442992339,
136
+ "grad_norm": 27.32846450805664,
137
+ "learning_rate": 4.232851985559567e-05,
138
+ "loss": 0.4858,
139
  "step": 170
140
  },
141
  {
142
  "epoch": 0.32447048219918884,
143
+ "grad_norm": 14.389259338378906,
144
+ "learning_rate": 4.187725631768953e-05,
145
+ "loss": 0.4604,
146
  "step": 180
147
  },
148
  {
149
  "epoch": 0.3424966200991438,
150
+ "grad_norm": 17.084495544433594,
151
+ "learning_rate": 4.1425992779783394e-05,
152
+ "loss": 0.504,
153
  "step": 190
154
  },
155
  {
156
  "epoch": 0.3605227579990987,
157
+ "grad_norm": 27.83977508544922,
158
+ "learning_rate": 4.0974729241877256e-05,
159
+ "loss": 0.4581,
160
  "step": 200
161
  },
162
  {
163
  "epoch": 0.3785488958990536,
164
+ "grad_norm": 27.089218139648438,
165
+ "learning_rate": 4.052346570397112e-05,
166
+ "loss": 0.4239,
167
  "step": 210
168
  },
169
  {
170
  "epoch": 0.39657503379900855,
171
+ "grad_norm": 22.199338912963867,
172
+ "learning_rate": 4.007220216606498e-05,
173
+ "loss": 0.4356,
174
  "step": 220
175
  },
176
  {
177
  "epoch": 0.4146011716989635,
178
+ "grad_norm": 13.628811836242676,
179
+ "learning_rate": 3.962093862815885e-05,
180
+ "loss": 0.4363,
181
  "step": 230
182
  },
183
  {
184
  "epoch": 0.43262730959891843,
185
+ "grad_norm": 27.136962890625,
186
+ "learning_rate": 3.916967509025271e-05,
187
+ "loss": 0.4419,
188
  "step": 240
189
  },
190
  {
191
  "epoch": 0.45065344749887337,
192
+ "grad_norm": 21.228151321411133,
193
+ "learning_rate": 3.8718411552346575e-05,
194
+ "loss": 0.4258,
195
  "step": 250
196
  },
197
  {
198
  "epoch": 0.4686795853988283,
199
+ "grad_norm": 21.63435935974121,
200
+ "learning_rate": 3.826714801444044e-05,
201
+ "loss": 0.4232,
202
  "step": 260
203
  },
204
  {
205
  "epoch": 0.48670572329878325,
206
+ "grad_norm": 23.011816024780273,
207
+ "learning_rate": 3.78158844765343e-05,
208
+ "loss": 0.417,
209
  "step": 270
210
  },
211
  {
212
  "epoch": 0.5047318611987381,
213
+ "grad_norm": 22.028409957885742,
214
+ "learning_rate": 3.7364620938628155e-05,
215
+ "loss": 0.3913,
216
  "step": 280
217
  },
218
  {
219
  "epoch": 0.5227579990986931,
220
+ "grad_norm": 13.293946266174316,
221
+ "learning_rate": 3.6913357400722025e-05,
222
+ "loss": 0.4443,
223
  "step": 290
224
  },
225
  {
226
  "epoch": 0.540784136998648,
227
+ "grad_norm": 16.026897430419922,
228
+ "learning_rate": 3.646209386281589e-05,
229
+ "loss": 0.4243,
230
  "step": 300
231
  },
232
  {
233
  "epoch": 0.558810274898603,
234
+ "grad_norm": 22.489286422729492,
235
+ "learning_rate": 3.601083032490975e-05,
236
+ "loss": 0.4157,
237
  "step": 310
238
  },
239
  {
240
  "epoch": 0.5768364127985579,
241
+ "grad_norm": 34.415470123291016,
242
+ "learning_rate": 3.555956678700361e-05,
243
+ "loss": 0.4159,
244
  "step": 320
245
  },
246
  {
247
  "epoch": 0.5948625506985128,
248
+ "grad_norm": 28.348594665527344,
249
+ "learning_rate": 3.5108303249097474e-05,
250
+ "loss": 0.4409,
251
  "step": 330
252
  },
253
  {
254
  "epoch": 0.6128886885984678,
255
+ "grad_norm": 16.92120933532715,
256
+ "learning_rate": 3.4657039711191336e-05,
257
+ "loss": 0.4034,
258
  "step": 340
259
  },
260
  {
261
  "epoch": 0.6309148264984227,
262
+ "grad_norm": 15.073827743530273,
263
+ "learning_rate": 3.42057761732852e-05,
264
+ "loss": 0.4164,
265
  "step": 350
266
  },
267
  {
268
  "epoch": 0.6489409643983777,
269
+ "grad_norm": 23.035175323486328,
270
+ "learning_rate": 3.375451263537907e-05,
271
+ "loss": 0.3952,
272
  "step": 360
273
  },
274
  {
275
  "epoch": 0.6669671022983326,
276
+ "grad_norm": 19.5601806640625,
277
+ "learning_rate": 3.330324909747293e-05,
278
+ "loss": 0.3883,
279
  "step": 370
280
  },
281
  {
282
  "epoch": 0.6849932401982876,
283
+ "grad_norm": 20.22748565673828,
284
+ "learning_rate": 3.2851985559566786e-05,
285
+ "loss": 0.3869,
286
  "step": 380
287
  },
288
  {
289
  "epoch": 0.7030193780982424,
290
+ "grad_norm": 19.998939514160156,
291
+ "learning_rate": 3.240072202166065e-05,
292
+ "loss": 0.3889,
293
  "step": 390
294
  },
295
  {
296
  "epoch": 0.7210455159981974,
297
+ "grad_norm": 27.682971954345703,
298
+ "learning_rate": 3.194945848375451e-05,
299
+ "loss": 0.4554,
300
  "step": 400
301
  },
302
  {
303
  "epoch": 0.7390716538981523,
304
+ "grad_norm": 16.14117431640625,
305
+ "learning_rate": 3.149819494584837e-05,
306
+ "loss": 0.4202,
307
  "step": 410
308
  },
309
  {
310
  "epoch": 0.7570977917981072,
311
+ "grad_norm": 30.020023345947266,
312
+ "learning_rate": 3.104693140794224e-05,
313
+ "loss": 0.4067,
314
  "step": 420
315
  },
316
  {
317
  "epoch": 0.7751239296980622,
318
+ "grad_norm": 33.89706039428711,
319
+ "learning_rate": 3.0595667870036104e-05,
320
+ "loss": 0.3647,
321
  "step": 430
322
  },
323
  {
324
  "epoch": 0.7931500675980171,
325
+ "grad_norm": 15.263274192810059,
326
+ "learning_rate": 3.0144404332129967e-05,
327
+ "loss": 0.4292,
328
  "step": 440
329
  },
330
  {
331
  "epoch": 0.8111762054979721,
332
+ "grad_norm": 20.31402587890625,
333
+ "learning_rate": 2.969314079422383e-05,
334
+ "loss": 0.3802,
335
  "step": 450
336
  },
337
  {
338
  "epoch": 0.829202343397927,
339
+ "grad_norm": 18.45032501220703,
340
+ "learning_rate": 2.924187725631769e-05,
341
+ "loss": 0.39,
342
  "step": 460
343
  },
344
  {
345
  "epoch": 0.847228481297882,
346
+ "grad_norm": 13.733787536621094,
347
+ "learning_rate": 2.879061371841155e-05,
348
+ "loss": 0.3903,
349
  "step": 470
350
  },
351
  {
352
  "epoch": 0.8652546191978369,
353
+ "grad_norm": 18.276058197021484,
354
+ "learning_rate": 2.8339350180505413e-05,
355
+ "loss": 0.4174,
356
  "step": 480
357
  },
358
  {
359
  "epoch": 0.8832807570977917,
360
+ "grad_norm": 14.345264434814453,
361
+ "learning_rate": 2.7888086642599282e-05,
362
+ "loss": 0.4075,
363
  "step": 490
364
  },
365
  {
366
  "epoch": 0.9013068949977467,
367
+ "grad_norm": 18.126863479614258,
368
+ "learning_rate": 2.7436823104693144e-05,
369
+ "loss": 0.3761,
370
  "step": 500
371
  },
372
  {
373
  "epoch": 0.9193330328977016,
374
+ "grad_norm": 27.871076583862305,
375
+ "learning_rate": 2.6985559566787007e-05,
376
+ "loss": 0.3941,
377
  "step": 510
378
  },
379
  {
380
  "epoch": 0.9373591707976566,
381
+ "grad_norm": 17.600557327270508,
382
+ "learning_rate": 2.6534296028880866e-05,
383
+ "loss": 0.3836,
384
  "step": 520
385
  },
386
  {
387
  "epoch": 0.9553853086976115,
388
+ "grad_norm": 23.826416015625,
389
+ "learning_rate": 2.6083032490974728e-05,
390
+ "loss": 0.3882,
391
  "step": 530
392
  },
393
  {
394
  "epoch": 0.9734114465975665,
395
+ "grad_norm": 17.19468116760254,
396
+ "learning_rate": 2.563176895306859e-05,
397
+ "loss": 0.4026,
398
  "step": 540
399
  },
400
  {
401
  "epoch": 0.9914375844975214,
402
+ "grad_norm": 14.197047233581543,
403
+ "learning_rate": 2.518050541516246e-05,
404
+ "loss": 0.3874,
405
  "step": 550
406
  },
407
  {
408
  "epoch": 1.0,
409
+ "eval_f1": 0.8447773616312942,
410
+ "eval_loss": 0.35632607340812683,
411
+ "eval_precision": 0.8058753473600635,
412
+ "eval_recall": 0.8876257105378225,
413
+ "eval_runtime": 83.9871,
414
+ "eval_samples_per_second": 55.639,
415
+ "eval_steps_per_second": 6.965,
 
 
 
 
416
  "step": 555
417
  }
418
  ],
419
  "logging_steps": 10,
420
+ "max_steps": 1108,
421
  "num_input_tokens_seen": 0,
422
+ "num_train_epochs": 2,
423
  "save_steps": 500,
424
  "stateful_callbacks": {
425
  "TrainerControl": {
checkpoint-555/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:810629f6569bf22bd4122bdaa88d9cc0d6cea1b331667027a3cd4891b919dd14
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863b542ed1120790af07a52bd3cabdf15b7f39057e82c90e137fcc5e7061f2a0
3
  size 5432