tonyzhao6 commited on
Commit
2f66d9a
·
verified ·
1 Parent(s): f56539f

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -579
trainer_state.json DELETED
@@ -1,579 +0,0 @@
1
- {
2
- "best_metric": 0.11245531588792801,
3
- "best_model_checkpoint": "/home/tonyzhao6/Projects/urgency-detection-finetuning/results/model_training/gemma-2-2b-it-8bit-64-32-v4/checkpoint-700",
4
- "epoch": 0.970873786407767,
5
- "eval_steps": 100,
6
- "global_step": 700,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.013869625520110958,
13
- "grad_norm": 0.9987179040908813,
14
- "learning_rate": 1.834862385321101e-05,
15
- "loss": 2.1962,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.027739251040221916,
20
- "grad_norm": 0.7583550810813904,
21
- "learning_rate": 3.669724770642202e-05,
22
- "loss": 1.6408,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.04160887656033287,
27
- "grad_norm": 0.6101565361022949,
28
- "learning_rate": 5.504587155963303e-05,
29
- "loss": 0.8461,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.05547850208044383,
34
- "grad_norm": 0.2617435157299042,
35
- "learning_rate": 7.339449541284404e-05,
36
- "loss": 0.3555,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.06934812760055478,
41
- "grad_norm": 0.17355866730213165,
42
- "learning_rate": 9.174311926605506e-05,
43
- "loss": 0.2506,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.08321775312066575,
48
- "grad_norm": 0.1640639752149582,
49
- "learning_rate": 0.00011009174311926606,
50
- "loss": 0.23,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.0970873786407767,
55
- "grad_norm": 0.1592264175415039,
56
- "learning_rate": 0.00012844036697247707,
57
- "loss": 0.2178,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.11095700416088766,
62
- "grad_norm": 0.13895617425441742,
63
- "learning_rate": 0.0001467889908256881,
64
- "loss": 0.1953,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.12482662968099861,
69
- "grad_norm": 0.13263335824012756,
70
- "learning_rate": 0.0001651376146788991,
71
- "loss": 0.1851,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.13869625520110956,
76
- "grad_norm": 0.14112189412117004,
77
- "learning_rate": 0.00018348623853211012,
78
- "loss": 0.1782,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.13869625520110956,
83
- "eval_loss": 0.1666356474161148,
84
- "eval_runtime": 87.2237,
85
- "eval_samples_per_second": 14.48,
86
- "eval_steps_per_second": 0.906,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 0.15256588072122051,
91
- "grad_norm": 0.1304333657026291,
92
- "learning_rate": 0.00019994279176201374,
93
- "loss": 0.1644,
94
- "step": 110
95
- },
96
- {
97
- "epoch": 0.1664355062413315,
98
- "grad_norm": 0.13556469976902008,
99
- "learning_rate": 0.00019937070938215104,
100
- "loss": 0.1614,
101
- "step": 120
102
- },
103
- {
104
- "epoch": 0.18030513176144244,
105
- "grad_norm": 0.12493357062339783,
106
- "learning_rate": 0.00019879862700228834,
107
- "loss": 0.148,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 0.1941747572815534,
112
- "grad_norm": 0.12785165011882782,
113
- "learning_rate": 0.00019822654462242566,
114
- "loss": 0.1526,
115
- "step": 140
116
- },
117
- {
118
- "epoch": 0.20804438280166435,
119
- "grad_norm": 0.1414169818162918,
120
- "learning_rate": 0.00019765446224256295,
121
- "loss": 0.1498,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 0.22191400832177532,
126
- "grad_norm": 0.11336012184619904,
127
- "learning_rate": 0.00019708237986270025,
128
- "loss": 0.1506,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 0.23578363384188628,
133
- "grad_norm": 0.11893126368522644,
134
- "learning_rate": 0.00019651029748283754,
135
- "loss": 0.1343,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 0.24965325936199723,
140
- "grad_norm": 0.12188615649938583,
141
- "learning_rate": 0.00019593821510297484,
142
- "loss": 0.1379,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 0.2635228848821082,
147
- "grad_norm": 0.11430846899747849,
148
- "learning_rate": 0.00019536613272311214,
149
- "loss": 0.1344,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 0.27739251040221913,
154
- "grad_norm": 0.11359121650457382,
155
- "learning_rate": 0.00019479405034324946,
156
- "loss": 0.139,
157
- "step": 200
158
- },
159
- {
160
- "epoch": 0.27739251040221913,
161
- "eval_loss": 0.13432957231998444,
162
- "eval_runtime": 86.9127,
163
- "eval_samples_per_second": 14.532,
164
- "eval_steps_per_second": 0.909,
165
- "step": 200
166
- },
167
- {
168
- "epoch": 0.2912621359223301,
169
- "grad_norm": 0.10335998982191086,
170
- "learning_rate": 0.00019422196796338675,
171
- "loss": 0.1374,
172
- "step": 210
173
- },
174
- {
175
- "epoch": 0.30513176144244103,
176
- "grad_norm": 0.09991727769374847,
177
- "learning_rate": 0.00019364988558352405,
178
- "loss": 0.1344,
179
- "step": 220
180
- },
181
- {
182
- "epoch": 0.31900138696255204,
183
- "grad_norm": 0.10995834320783615,
184
- "learning_rate": 0.00019307780320366135,
185
- "loss": 0.1394,
186
- "step": 230
187
- },
188
- {
189
- "epoch": 0.332871012482663,
190
- "grad_norm": 0.10396566987037659,
191
- "learning_rate": 0.00019250572082379864,
192
- "loss": 0.1223,
193
- "step": 240
194
- },
195
- {
196
- "epoch": 0.34674063800277394,
197
- "grad_norm": 0.10032226890325546,
198
- "learning_rate": 0.00019193363844393594,
199
- "loss": 0.1285,
200
- "step": 250
201
- },
202
- {
203
- "epoch": 0.3606102635228849,
204
- "grad_norm": 0.10445073246955872,
205
- "learning_rate": 0.00019136155606407323,
206
- "loss": 0.1261,
207
- "step": 260
208
- },
209
- {
210
- "epoch": 0.37447988904299584,
211
- "grad_norm": 0.11336586624383926,
212
- "learning_rate": 0.00019078947368421053,
213
- "loss": 0.1286,
214
- "step": 270
215
- },
216
- {
217
- "epoch": 0.3883495145631068,
218
- "grad_norm": 0.10205301642417908,
219
- "learning_rate": 0.00019021739130434782,
220
- "loss": 0.1208,
221
- "step": 280
222
- },
223
- {
224
- "epoch": 0.40221914008321774,
225
- "grad_norm": 0.09567493945360184,
226
- "learning_rate": 0.00018964530892448515,
227
- "loss": 0.1271,
228
- "step": 290
229
- },
230
- {
231
- "epoch": 0.4160887656033287,
232
- "grad_norm": 0.10747899860143661,
233
- "learning_rate": 0.00018907322654462244,
234
- "loss": 0.1233,
235
- "step": 300
236
- },
237
- {
238
- "epoch": 0.4160887656033287,
239
- "eval_loss": 0.1257271021604538,
240
- "eval_runtime": 86.6524,
241
- "eval_samples_per_second": 14.575,
242
- "eval_steps_per_second": 0.912,
243
- "step": 300
244
- },
245
- {
246
- "epoch": 0.42995839112343964,
247
- "grad_norm": 0.10108979046344757,
248
- "learning_rate": 0.00018850114416475974,
249
- "loss": 0.124,
250
- "step": 310
251
- },
252
- {
253
- "epoch": 0.44382801664355065,
254
- "grad_norm": 0.09316466003656387,
255
- "learning_rate": 0.00018792906178489703,
256
- "loss": 0.1212,
257
- "step": 320
258
- },
259
- {
260
- "epoch": 0.4576976421636616,
261
- "grad_norm": 0.10638488829135895,
262
- "learning_rate": 0.00018735697940503433,
263
- "loss": 0.1288,
264
- "step": 330
265
- },
266
- {
267
- "epoch": 0.47156726768377255,
268
- "grad_norm": 0.09914766252040863,
269
- "learning_rate": 0.00018678489702517162,
270
- "loss": 0.1259,
271
- "step": 340
272
- },
273
- {
274
- "epoch": 0.4854368932038835,
275
- "grad_norm": 0.09707864373922348,
276
- "learning_rate": 0.00018621281464530892,
277
- "loss": 0.124,
278
- "step": 350
279
- },
280
- {
281
- "epoch": 0.49930651872399445,
282
- "grad_norm": 0.09507231414318085,
283
- "learning_rate": 0.00018564073226544621,
284
- "loss": 0.1262,
285
- "step": 360
286
- },
287
- {
288
- "epoch": 0.5131761442441054,
289
- "grad_norm": 0.09129882603883743,
290
- "learning_rate": 0.0001850686498855835,
291
- "loss": 0.1211,
292
- "step": 370
293
- },
294
- {
295
- "epoch": 0.5270457697642164,
296
- "grad_norm": 0.09889239072799683,
297
- "learning_rate": 0.00018449656750572083,
298
- "loss": 0.1218,
299
- "step": 380
300
- },
301
- {
302
- "epoch": 0.5409153952843273,
303
- "grad_norm": 0.09886115044355392,
304
- "learning_rate": 0.00018392448512585813,
305
- "loss": 0.1214,
306
- "step": 390
307
- },
308
- {
309
- "epoch": 0.5547850208044383,
310
- "grad_norm": 0.09064166992902756,
311
- "learning_rate": 0.00018335240274599542,
312
- "loss": 0.126,
313
- "step": 400
314
- },
315
- {
316
- "epoch": 0.5547850208044383,
317
- "eval_loss": 0.12142250686883926,
318
- "eval_runtime": 86.64,
319
- "eval_samples_per_second": 14.578,
320
- "eval_steps_per_second": 0.912,
321
- "step": 400
322
- },
323
- {
324
- "epoch": 0.5686546463245492,
325
- "grad_norm": 0.10354544222354889,
326
- "learning_rate": 0.00018278032036613272,
327
- "loss": 0.1253,
328
- "step": 410
329
- },
330
- {
331
- "epoch": 0.5825242718446602,
332
- "grad_norm": 0.09165250509977341,
333
- "learning_rate": 0.00018220823798627001,
334
- "loss": 0.1224,
335
- "step": 420
336
- },
337
- {
338
- "epoch": 0.5963938973647711,
339
- "grad_norm": 0.09138130396604538,
340
- "learning_rate": 0.0001816361556064073,
341
- "loss": 0.1289,
342
- "step": 430
343
- },
344
- {
345
- "epoch": 0.6102635228848821,
346
- "grad_norm": 0.09735599905252457,
347
- "learning_rate": 0.00018106407322654463,
348
- "loss": 0.1181,
349
- "step": 440
350
- },
351
- {
352
- "epoch": 0.624133148404993,
353
- "grad_norm": 0.09955897927284241,
354
- "learning_rate": 0.00018049199084668193,
355
- "loss": 0.1207,
356
- "step": 450
357
- },
358
- {
359
- "epoch": 0.6380027739251041,
360
- "grad_norm": 0.09378518909215927,
361
- "learning_rate": 0.00017991990846681922,
362
- "loss": 0.1189,
363
- "step": 460
364
- },
365
- {
366
- "epoch": 0.651872399445215,
367
- "grad_norm": 0.09985518455505371,
368
- "learning_rate": 0.00017934782608695652,
369
- "loss": 0.1196,
370
- "step": 470
371
- },
372
- {
373
- "epoch": 0.665742024965326,
374
- "grad_norm": 0.09567826986312866,
375
- "learning_rate": 0.00017877574370709382,
376
- "loss": 0.1189,
377
- "step": 480
378
- },
379
- {
380
- "epoch": 0.6796116504854369,
381
- "grad_norm": 0.09133660793304443,
382
- "learning_rate": 0.0001782036613272311,
383
- "loss": 0.1199,
384
- "step": 490
385
- },
386
- {
387
- "epoch": 0.6934812760055479,
388
- "grad_norm": 0.07571779191493988,
389
- "learning_rate": 0.00017763157894736843,
390
- "loss": 0.1199,
391
- "step": 500
392
- },
393
- {
394
- "epoch": 0.6934812760055479,
395
- "eval_loss": 0.11764033138751984,
396
- "eval_runtime": 86.7125,
397
- "eval_samples_per_second": 14.565,
398
- "eval_steps_per_second": 0.911,
399
- "step": 500
400
- },
401
- {
402
- "epoch": 0.7073509015256588,
403
- "grad_norm": 0.07904700189828873,
404
- "learning_rate": 0.00017705949656750573,
405
- "loss": 0.1174,
406
- "step": 510
407
- },
408
- {
409
- "epoch": 0.7212205270457698,
410
- "grad_norm": 0.0874553844332695,
411
- "learning_rate": 0.00017648741418764302,
412
- "loss": 0.1191,
413
- "step": 520
414
- },
415
- {
416
- "epoch": 0.7350901525658807,
417
- "grad_norm": 0.09417985379695892,
418
- "learning_rate": 0.00017591533180778032,
419
- "loss": 0.1158,
420
- "step": 530
421
- },
422
- {
423
- "epoch": 0.7489597780859917,
424
- "grad_norm": 0.0866062194108963,
425
- "learning_rate": 0.00017534324942791762,
426
- "loss": 0.1106,
427
- "step": 540
428
- },
429
- {
430
- "epoch": 0.7628294036061026,
431
- "grad_norm": 0.08498796820640564,
432
- "learning_rate": 0.0001747711670480549,
433
- "loss": 0.1124,
434
- "step": 550
435
- },
436
- {
437
- "epoch": 0.7766990291262136,
438
- "grad_norm": 0.08251694589853287,
439
- "learning_rate": 0.00017419908466819223,
440
- "loss": 0.1136,
441
- "step": 560
442
- },
443
- {
444
- "epoch": 0.7905686546463245,
445
- "grad_norm": 0.08275240659713745,
446
- "learning_rate": 0.00017362700228832953,
447
- "loss": 0.1107,
448
- "step": 570
449
- },
450
- {
451
- "epoch": 0.8044382801664355,
452
- "grad_norm": 0.08751562237739563,
453
- "learning_rate": 0.00017305491990846682,
454
- "loss": 0.1169,
455
- "step": 580
456
- },
457
- {
458
- "epoch": 0.8183079056865464,
459
- "grad_norm": 0.09078636020421982,
460
- "learning_rate": 0.00017248283752860412,
461
- "loss": 0.1143,
462
- "step": 590
463
- },
464
- {
465
- "epoch": 0.8321775312066574,
466
- "grad_norm": 0.08412676304578781,
467
- "learning_rate": 0.00017191075514874142,
468
- "loss": 0.1197,
469
- "step": 600
470
- },
471
- {
472
- "epoch": 0.8321775312066574,
473
- "eval_loss": 0.11502571403980255,
474
- "eval_runtime": 86.625,
475
- "eval_samples_per_second": 14.58,
476
- "eval_steps_per_second": 0.912,
477
- "step": 600
478
- },
479
- {
480
- "epoch": 0.8460471567267683,
481
- "grad_norm": 0.08373397588729858,
482
- "learning_rate": 0.0001713386727688787,
483
- "loss": 0.1205,
484
- "step": 610
485
- },
486
- {
487
- "epoch": 0.8599167822468793,
488
- "grad_norm": 0.08933025598526001,
489
- "learning_rate": 0.00017076659038901603,
490
- "loss": 0.1147,
491
- "step": 620
492
- },
493
- {
494
- "epoch": 0.8737864077669902,
495
- "grad_norm": 0.08800772577524185,
496
- "learning_rate": 0.00017019450800915333,
497
- "loss": 0.1201,
498
- "step": 630
499
- },
500
- {
501
- "epoch": 0.8876560332871013,
502
- "grad_norm": 0.08623263984918594,
503
- "learning_rate": 0.00016962242562929063,
504
- "loss": 0.1144,
505
- "step": 640
506
- },
507
- {
508
- "epoch": 0.9015256588072122,
509
- "grad_norm": 0.0788191556930542,
510
- "learning_rate": 0.00016905034324942792,
511
- "loss": 0.1188,
512
- "step": 650
513
- },
514
- {
515
- "epoch": 0.9153952843273232,
516
- "grad_norm": 0.0787658542394638,
517
- "learning_rate": 0.00016847826086956522,
518
- "loss": 0.1077,
519
- "step": 660
520
- },
521
- {
522
- "epoch": 0.9292649098474342,
523
- "grad_norm": 0.08364666253328323,
524
- "learning_rate": 0.0001679061784897025,
525
- "loss": 0.1072,
526
- "step": 670
527
- },
528
- {
529
- "epoch": 0.9431345353675451,
530
- "grad_norm": 0.08853990584611893,
531
- "learning_rate": 0.00016733409610983983,
532
- "loss": 0.1097,
533
- "step": 680
534
- },
535
- {
536
- "epoch": 0.957004160887656,
537
- "grad_norm": 0.08456674963235855,
538
- "learning_rate": 0.00016676201372997713,
539
- "loss": 0.1167,
540
- "step": 690
541
- },
542
- {
543
- "epoch": 0.970873786407767,
544
- "grad_norm": 0.0840703621506691,
545
- "learning_rate": 0.00016618993135011443,
546
- "loss": 0.1231,
547
- "step": 700
548
- },
549
- {
550
- "epoch": 0.970873786407767,
551
- "eval_loss": 0.11245531588792801,
552
- "eval_runtime": 86.613,
553
- "eval_samples_per_second": 14.582,
554
- "eval_steps_per_second": 0.912,
555
- "step": 700
556
- }
557
- ],
558
- "logging_steps": 10,
559
- "max_steps": 3605,
560
- "num_input_tokens_seen": 0,
561
- "num_train_epochs": 5,
562
- "save_steps": 100,
563
- "stateful_callbacks": {
564
- "TrainerControl": {
565
- "args": {
566
- "should_epoch_stop": false,
567
- "should_evaluate": false,
568
- "should_log": false,
569
- "should_save": true,
570
- "should_training_stop": false
571
- },
572
- "attributes": {}
573
- }
574
- },
575
- "total_flos": 1.0638251619228058e+17,
576
- "train_batch_size": 16,
577
- "trial_name": null,
578
- "trial_params": null
579
- }