ccoreilly commited on
Commit
8176c36
·
1 Parent(s): 3d94e8c

Remove unnecessary files added as reference

Browse files
Files changed (4) hide show
  1. optimizer.pt +0 -3
  2. scheduler.pt +0 -3
  3. trainer_state.json +0 -510
  4. training_args.bin +0 -3
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:39823d9bd704e711675ae48109d4cc2d266d58e12f8227be95133228c697e396
3
- size 2490422279
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ecc9e980790d5f77be705a4f5cde16f657651bb91364b496dd824703d969308
3
- size 623
 
 
 
 
trainer_state.json DELETED
@@ -1,510 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 8.104816696762272,
5
- "global_step": 193000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.21,
12
- "learning_rate": 0.0002955626928115623,
13
- "loss": 1.1705,
14
- "step": 5000
15
- },
16
- {
17
- "epoch": 0.21,
18
- "eval_loss": 0.22145646810531616,
19
- "eval_runtime": 5082.7114,
20
- "eval_samples_per_second": 4.02,
21
- "step": 5000
22
- },
23
- {
24
- "epoch": 0.42,
25
- "learning_rate": 0.0002892236825423657,
26
- "loss": 0.2914,
27
- "step": 10000
28
- },
29
- {
30
- "epoch": 0.42,
31
- "eval_loss": 0.19062571227550507,
32
- "eval_runtime": 5086.3406,
33
- "eval_samples_per_second": 4.018,
34
- "step": 10000
35
- },
36
- {
37
- "epoch": 0.63,
38
- "learning_rate": 0.00028288467227316906,
39
- "loss": 0.2617,
40
- "step": 15000
41
- },
42
- {
43
- "epoch": 0.63,
44
- "eval_loss": 0.1784171611070633,
45
- "eval_runtime": 4895.8348,
46
- "eval_samples_per_second": 4.174,
47
- "step": 15000
48
- },
49
- {
50
- "epoch": 0.84,
51
- "learning_rate": 0.0002765456620039724,
52
- "loss": 0.2449,
53
- "step": 20000
54
- },
55
- {
56
- "epoch": 0.84,
57
- "eval_loss": 0.16641439497470856,
58
- "eval_runtime": 4945.3495,
59
- "eval_samples_per_second": 4.132,
60
- "step": 20000
61
- },
62
- {
63
- "epoch": 1.05,
64
- "learning_rate": 0.0002702066517347758,
65
- "loss": 0.2264,
66
- "step": 25000
67
- },
68
- {
69
- "epoch": 1.05,
70
- "eval_loss": 0.15650227665901184,
71
- "eval_runtime": 4999.732,
72
- "eval_samples_per_second": 4.087,
73
- "step": 25000
74
- },
75
- {
76
- "epoch": 1.26,
77
- "learning_rate": 0.00026386764146557915,
78
- "loss": 0.2093,
79
- "step": 30000
80
- },
81
- {
82
- "epoch": 1.26,
83
- "eval_loss": 0.15184776484966278,
84
- "eval_runtime": 4877.0685,
85
- "eval_samples_per_second": 4.19,
86
- "step": 30000
87
- },
88
- {
89
- "epoch": 1.47,
90
- "learning_rate": 0.0002575286311963825,
91
- "loss": 0.2003,
92
- "step": 35000
93
- },
94
- {
95
- "epoch": 1.47,
96
- "eval_loss": 0.14190027117729187,
97
- "eval_runtime": 4879.5538,
98
- "eval_samples_per_second": 4.188,
99
- "step": 35000
100
- },
101
- {
102
- "epoch": 1.68,
103
- "learning_rate": 0.0002511896209271859,
104
- "loss": 0.1962,
105
- "step": 40000
106
- },
107
- {
108
- "epoch": 1.68,
109
- "eval_loss": 0.14596430957317352,
110
- "eval_runtime": 4860.8654,
111
- "eval_samples_per_second": 4.204,
112
- "step": 40000
113
- },
114
- {
115
- "epoch": 1.89,
116
- "learning_rate": 0.00024485061065798925,
117
- "loss": 0.19,
118
- "step": 45000
119
- },
120
- {
121
- "epoch": 1.89,
122
- "eval_loss": 0.13479308784008026,
123
- "eval_runtime": 4872.4496,
124
- "eval_samples_per_second": 4.194,
125
- "step": 45000
126
- },
127
- {
128
- "epoch": 2.1,
129
- "learning_rate": 0.00023851160038879262,
130
- "loss": 0.1769,
131
- "step": 50000
132
- },
133
- {
134
- "epoch": 2.1,
135
- "eval_loss": 0.13018357753753662,
136
- "eval_runtime": 4872.3296,
137
- "eval_samples_per_second": 4.194,
138
- "step": 50000
139
- },
140
- {
141
- "epoch": 2.31,
142
- "learning_rate": 0.00023217259011959596,
143
- "loss": 0.1674,
144
- "step": 55000
145
- },
146
- {
147
- "epoch": 2.31,
148
- "eval_loss": 0.1303720772266388,
149
- "eval_runtime": 4874.4649,
150
- "eval_samples_per_second": 4.192,
151
- "step": 55000
152
- },
153
- {
154
- "epoch": 2.52,
155
- "learning_rate": 0.00022583357985039935,
156
- "loss": 0.1655,
157
- "step": 60000
158
- },
159
- {
160
- "epoch": 2.52,
161
- "eval_loss": 0.12317115068435669,
162
- "eval_runtime": 4882.2049,
163
- "eval_samples_per_second": 4.186,
164
- "step": 60000
165
- },
166
- {
167
- "epoch": 2.73,
168
- "learning_rate": 0.00021949456958120271,
169
- "loss": 0.1608,
170
- "step": 65000
171
- },
172
- {
173
- "epoch": 2.73,
174
- "eval_loss": 0.12056649476289749,
175
- "eval_runtime": 4877.3831,
176
- "eval_samples_per_second": 4.19,
177
- "step": 65000
178
- },
179
- {
180
- "epoch": 2.94,
181
- "learning_rate": 0.00021315555931200605,
182
- "loss": 0.1565,
183
- "step": 70000
184
- },
185
- {
186
- "epoch": 2.94,
187
- "eval_loss": 0.11486475169658661,
188
- "eval_runtime": 4876.7317,
189
- "eval_samples_per_second": 4.19,
190
- "step": 70000
191
- },
192
- {
193
- "epoch": 3.15,
194
- "learning_rate": 0.00020681654904280945,
195
- "loss": 0.146,
196
- "step": 75000
197
- },
198
- {
199
- "epoch": 3.15,
200
- "eval_loss": 0.11829441785812378,
201
- "eval_runtime": 4833.5153,
202
- "eval_samples_per_second": 4.228,
203
- "step": 75000
204
- },
205
- {
206
- "epoch": 3.36,
207
- "learning_rate": 0.00020047753877361279,
208
- "loss": 0.1403,
209
- "step": 80000
210
- },
211
- {
212
- "epoch": 3.36,
213
- "eval_loss": 0.11143175512552261,
214
- "eval_runtime": 4853.9814,
215
- "eval_samples_per_second": 4.21,
216
- "step": 80000
217
- },
218
- {
219
- "epoch": 3.57,
220
- "learning_rate": 0.00019413852850441618,
221
- "loss": 0.1376,
222
- "step": 85000
223
- },
224
- {
225
- "epoch": 3.57,
226
- "eval_loss": 0.11027190089225769,
227
- "eval_runtime": 4858.6043,
228
- "eval_samples_per_second": 4.206,
229
- "step": 85000
230
- },
231
- {
232
- "epoch": 3.78,
233
- "learning_rate": 0.00018779951823521952,
234
- "loss": 0.1337,
235
- "step": 90000
236
- },
237
- {
238
- "epoch": 3.78,
239
- "eval_loss": 0.10872453451156616,
240
- "eval_runtime": 4864.6388,
241
- "eval_samples_per_second": 4.201,
242
- "step": 90000
243
- },
244
- {
245
- "epoch": 3.99,
246
- "learning_rate": 0.00018146050796602288,
247
- "loss": 0.1325,
248
- "step": 95000
249
- },
250
- {
251
- "epoch": 3.99,
252
- "eval_loss": 0.10718829929828644,
253
- "eval_runtime": 4921.9134,
254
- "eval_samples_per_second": 4.152,
255
- "step": 95000
256
- },
257
- {
258
- "epoch": 4.2,
259
- "learning_rate": 0.00017512149769682625,
260
- "loss": 0.1195,
261
- "step": 100000
262
- },
263
- {
264
- "epoch": 4.2,
265
- "eval_loss": 0.10430513322353363,
266
- "eval_runtime": 4864.6263,
267
- "eval_samples_per_second": 4.201,
268
- "step": 100000
269
- },
270
- {
271
- "epoch": 4.41,
272
- "learning_rate": 0.00016878248742762961,
273
- "loss": 0.118,
274
- "step": 105000
275
- },
276
- {
277
- "epoch": 4.41,
278
- "eval_loss": 0.1070966124534607,
279
- "eval_runtime": 4879.2783,
280
- "eval_samples_per_second": 4.188,
281
- "step": 105000
282
- },
283
- {
284
- "epoch": 4.62,
285
- "learning_rate": 0.00016244347715843295,
286
- "loss": 0.1173,
287
- "step": 110000
288
- },
289
- {
290
- "epoch": 4.62,
291
- "eval_loss": 0.10433077067136765,
292
- "eval_runtime": 4876.4984,
293
- "eval_samples_per_second": 4.191,
294
- "step": 110000
295
- },
296
- {
297
- "epoch": 4.83,
298
- "learning_rate": 0.00015610446688923635,
299
- "loss": 0.115,
300
- "step": 115000
301
- },
302
- {
303
- "epoch": 4.83,
304
- "eval_loss": 0.09682977199554443,
305
- "eval_runtime": 4893.4652,
306
- "eval_samples_per_second": 4.176,
307
- "step": 115000
308
- },
309
- {
310
- "epoch": 5.04,
311
- "learning_rate": 0.0001497654566200397,
312
- "loss": 0.1102,
313
- "step": 120000
314
- },
315
- {
316
- "epoch": 5.04,
317
- "eval_loss": 0.09630288183689117,
318
- "eval_runtime": 4914.2049,
319
- "eval_samples_per_second": 4.158,
320
- "step": 120000
321
- },
322
- {
323
- "epoch": 5.25,
324
- "learning_rate": 0.00014342644635084308,
325
- "loss": 0.1019,
326
- "step": 125000
327
- },
328
- {
329
- "epoch": 5.25,
330
- "eval_loss": 0.0918075293302536,
331
- "eval_runtime": 4893.9499,
332
- "eval_samples_per_second": 4.176,
333
- "step": 125000
334
- },
335
- {
336
- "epoch": 5.46,
337
- "learning_rate": 0.00013708743608164644,
338
- "loss": 0.1014,
339
- "step": 130000
340
- },
341
- {
342
- "epoch": 5.46,
343
- "eval_loss": 0.09067174792289734,
344
- "eval_runtime": 4891.4795,
345
- "eval_samples_per_second": 4.178,
346
- "step": 130000
347
- },
348
- {
349
- "epoch": 5.67,
350
- "learning_rate": 0.0001307484258124498,
351
- "loss": 0.1,
352
- "step": 135000
353
- },
354
- {
355
- "epoch": 5.67,
356
- "eval_loss": 0.08851899951696396,
357
- "eval_runtime": 4884.1688,
358
- "eval_samples_per_second": 4.184,
359
- "step": 135000
360
- },
361
- {
362
- "epoch": 5.88,
363
- "learning_rate": 0.00012440941554325318,
364
- "loss": 0.0971,
365
- "step": 140000
366
- },
367
- {
368
- "epoch": 5.88,
369
- "eval_loss": 0.08720648288726807,
370
- "eval_runtime": 4896.4597,
371
- "eval_samples_per_second": 4.173,
372
- "step": 140000
373
- },
374
- {
375
- "epoch": 6.09,
376
- "learning_rate": 0.00011807040527405654,
377
- "loss": 0.0921,
378
- "step": 145000
379
- },
380
- {
381
- "epoch": 6.09,
382
- "eval_loss": 0.08666499704122543,
383
- "eval_runtime": 5117.0953,
384
- "eval_samples_per_second": 3.993,
385
- "step": 145000
386
- },
387
- {
388
- "epoch": 6.3,
389
- "learning_rate": 0.00011173139500485991,
390
- "loss": 0.0884,
391
- "step": 150000
392
- },
393
- {
394
- "epoch": 6.3,
395
- "eval_loss": 0.0831904485821724,
396
- "eval_runtime": 4879.0536,
397
- "eval_samples_per_second": 4.188,
398
- "step": 150000
399
- },
400
- {
401
- "epoch": 6.51,
402
- "learning_rate": 0.00010539238473566326,
403
- "loss": 0.0864,
404
- "step": 155000
405
- },
406
- {
407
- "epoch": 6.51,
408
- "eval_loss": 0.08337873965501785,
409
- "eval_runtime": 4900.7475,
410
- "eval_samples_per_second": 4.17,
411
- "step": 155000
412
- },
413
- {
414
- "epoch": 6.72,
415
- "learning_rate": 9.905337446646663e-05,
416
- "loss": 0.0861,
417
- "step": 160000
418
- },
419
- {
420
- "epoch": 6.72,
421
- "eval_loss": 0.08155979961156845,
422
- "eval_runtime": 4919.8246,
423
- "eval_samples_per_second": 4.154,
424
- "step": 160000
425
- },
426
- {
427
- "epoch": 6.93,
428
- "learning_rate": 9.271436419726999e-05,
429
- "loss": 0.083,
430
- "step": 165000
431
- },
432
- {
433
- "epoch": 6.93,
434
- "eval_loss": 0.08167865127325058,
435
- "eval_runtime": 4921.4971,
436
- "eval_samples_per_second": 4.152,
437
- "step": 165000
438
- },
439
- {
440
- "epoch": 7.14,
441
- "learning_rate": 8.637535392807336e-05,
442
- "loss": 0.0769,
443
- "step": 170000
444
- },
445
- {
446
- "epoch": 7.14,
447
- "eval_loss": 0.0775604099035263,
448
- "eval_runtime": 4893.4221,
449
- "eval_samples_per_second": 4.176,
450
- "step": 170000
451
- },
452
- {
453
- "epoch": 7.35,
454
- "learning_rate": 8.003634365887672e-05,
455
- "loss": 0.0749,
456
- "step": 175000
457
- },
458
- {
459
- "epoch": 7.35,
460
- "eval_loss": 0.07773936539888382,
461
- "eval_runtime": 4914.0163,
462
- "eval_samples_per_second": 4.159,
463
- "step": 175000
464
- },
465
- {
466
- "epoch": 7.56,
467
- "learning_rate": 7.369733338968009e-05,
468
- "loss": 0.0735,
469
- "step": 180000
470
- },
471
- {
472
- "epoch": 7.56,
473
- "eval_loss": 0.07420430332422256,
474
- "eval_runtime": 4934.0827,
475
- "eval_samples_per_second": 4.142,
476
- "step": 180000
477
- },
478
- {
479
- "epoch": 7.77,
480
- "learning_rate": 6.735832312048346e-05,
481
- "loss": 0.0715,
482
- "step": 185000
483
- },
484
- {
485
- "epoch": 7.77,
486
- "eval_loss": 0.07269106060266495,
487
- "eval_runtime": 4925.1046,
488
- "eval_samples_per_second": 4.149,
489
- "step": 185000
490
- },
491
- {
492
- "epoch": 7.98,
493
- "learning_rate": 6.1019312851286814e-05,
494
- "loss": 0.0702,
495
- "step": 190000
496
- },
497
- {
498
- "epoch": 7.98,
499
- "eval_loss": 0.07183075696229935,
500
- "eval_runtime": 4948.9933,
501
- "eval_samples_per_second": 4.129,
502
- "step": 190000
503
- }
504
- ],
505
- "max_steps": 238130,
506
- "num_train_epochs": 10,
507
- "total_flos": 5.387421756388246e+20,
508
- "trial_name": null,
509
- "trial_params": null
510
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebe791c5dd04e87cd6dce636d0245fc7269b2260047be0dafb37e8e4f5eb0b6a
3
- size 2287