ChiefTheLord commited on
Commit
a1cdbd2
verified
1 Parent(s): 839592d

Upload folder using huggingface_hub

Browse files
flickr8k_checkpoints/checkpoint-1208/adapter.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e99df632ba281149a567ab0f850dc3be5c51d9b63810eb68577d29603d14c0e
3
+ size 17064856
flickr8k_checkpoints/checkpoint-1208/eval_state.json ADDED
The diff for this file is too large to render. See raw diff
 
flickr8k_checkpoints/checkpoint-1208/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067b5fde8b60af2370dac43b51a0eaaed1d55487d4847e177307de9f998befed
3
+ size 8714492
flickr8k_checkpoints/checkpoint-1208/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
+ size 14244
flickr8k_checkpoints/checkpoint-1208/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db6a4c086774bca871a88695029095eba8e64c70a4cdf4e980cb249f462eb44
3
+ size 1064
flickr8k_checkpoints/checkpoint-1208/trainer_state.json ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1208,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.026490066225165563,
13
+ "grad_norm": 0.23711644113063812,
14
+ "learning_rate": 0.0004324324324324325,
15
+ "loss": 4.5092,
16
+ "step": 16
17
+ },
18
+ {
19
+ "epoch": 0.052980132450331126,
20
+ "grad_norm": 0.285342812538147,
21
+ "learning_rate": 0.0007297297297297297,
22
+ "loss": 4.9762,
23
+ "step": 32
24
+ },
25
+ {
26
+ "epoch": 0.07947019867549669,
27
+ "grad_norm": 0.09107156842947006,
28
+ "learning_rate": 0.0009999352232600816,
29
+ "loss": 4.3898,
30
+ "step": 48
31
+ },
32
+ {
33
+ "epoch": 0.10596026490066225,
34
+ "grad_norm": 0.11629017442464828,
35
+ "learning_rate": 0.0009991293467102582,
36
+ "loss": 4.3831,
37
+ "step": 64
38
+ },
39
+ {
40
+ "epoch": 0.13245033112582782,
41
+ "grad_norm": 0.1688951849937439,
42
+ "learning_rate": 0.000997403926531526,
43
+ "loss": 4.3703,
44
+ "step": 80
45
+ },
46
+ {
47
+ "epoch": 0.15894039735099338,
48
+ "grad_norm": 0.11590718477964401,
49
+ "learning_rate": 0.0009947621414572996,
50
+ "loss": 4.4239,
51
+ "step": 96
52
+ },
53
+ {
54
+ "epoch": 0.18543046357615894,
55
+ "grad_norm": 0.1131146252155304,
56
+ "learning_rate": 0.0009912088584356955,
57
+ "loss": 4.3535,
58
+ "step": 112
59
+ },
60
+ {
61
+ "epoch": 0.2119205298013245,
62
+ "grad_norm": 0.136209174990654,
63
+ "learning_rate": 0.000986750623663177,
64
+ "loss": 4.3339,
65
+ "step": 128
66
+ },
67
+ {
68
+ "epoch": 0.23841059602649006,
69
+ "grad_norm": 0.18205471336841583,
70
+ "learning_rate": 0.000981395650524528,
71
+ "loss": 4.3282,
72
+ "step": 144
73
+ },
74
+ {
75
+ "epoch": 0.26490066225165565,
76
+ "grad_norm": 0.10476769506931305,
77
+ "learning_rate": 0.000975153804461372,
78
+ "loss": 4.3313,
79
+ "step": 160
80
+ },
81
+ {
82
+ "epoch": 0.2913907284768212,
83
+ "grad_norm": 0.0960649624466896,
84
+ "learning_rate": 0.0009680365847971162,
85
+ "loss": 4.3201,
86
+ "step": 176
87
+ },
88
+ {
89
+ "epoch": 0.31788079470198677,
90
+ "grad_norm": 0.13584400713443756,
91
+ "learning_rate": 0.0009600571035518034,
92
+ "loss": 4.3756,
93
+ "step": 192
94
+ },
95
+ {
96
+ "epoch": 0.3443708609271523,
97
+ "grad_norm": 0.10583332926034927,
98
+ "learning_rate": 0.000951230061285898,
99
+ "loss": 4.3302,
100
+ "step": 208
101
+ },
102
+ {
103
+ "epoch": 0.3708609271523179,
104
+ "grad_norm": 0.13830633461475372,
105
+ "learning_rate": 0.0009415717200175151,
106
+ "loss": 4.3383,
107
+ "step": 224
108
+ },
109
+ {
110
+ "epoch": 0.3973509933774834,
111
+ "grad_norm": 0.12482637912034988,
112
+ "learning_rate": 0.0009310998732629798,
113
+ "loss": 4.3083,
114
+ "step": 240
115
+ },
116
+ {
117
+ "epoch": 0.423841059602649,
118
+ "grad_norm": 0.11580520868301392,
119
+ "learning_rate": 0.0009198338132559177,
120
+ "loss": 4.3313,
121
+ "step": 256
122
+ },
123
+ {
124
+ "epoch": 0.4503311258278146,
125
+ "grad_norm": 0.15633529424667358,
126
+ "learning_rate": 0.0009077942954052636,
127
+ "loss": 4.3099,
128
+ "step": 272
129
+ },
130
+ {
131
+ "epoch": 0.4768211920529801,
132
+ "grad_norm": 0.12987282872200012,
133
+ "learning_rate": 0.0008950035000576705,
134
+ "loss": 4.2932,
135
+ "step": 288
136
+ },
137
+ {
138
+ "epoch": 0.5033112582781457,
139
+ "grad_norm": 0.1630668193101883,
140
+ "learning_rate": 0.000881484991634762,
141
+ "loss": 4.3073,
142
+ "step": 304
143
+ },
144
+ {
145
+ "epoch": 0.5298013245033113,
146
+ "grad_norm": 0.14686144888401031,
147
+ "learning_rate": 0.0008672636752205099,
148
+ "loss": 4.2848,
149
+ "step": 320
150
+ },
151
+ {
152
+ "epoch": 0.5562913907284768,
153
+ "grad_norm": 0.1868494600057602,
154
+ "learning_rate": 0.0008523657506787162,
155
+ "loss": 4.3405,
156
+ "step": 336
157
+ },
158
+ {
159
+ "epoch": 0.5827814569536424,
160
+ "grad_norm": 0.11744031310081482,
161
+ "learning_rate": 0.0008368186643851284,
162
+ "loss": 4.3128,
163
+ "step": 352
164
+ },
165
+ {
166
+ "epoch": 0.609271523178808,
167
+ "grad_norm": 0.14506162703037262,
168
+ "learning_rate": 0.0008206510586631114,
169
+ "loss": 4.3208,
170
+ "step": 368
171
+ },
172
+ {
173
+ "epoch": 0.6357615894039735,
174
+ "grad_norm": 0.12040343135595322,
175
+ "learning_rate": 0.0008038927190160309,
176
+ "loss": 4.2675,
177
+ "step": 384
178
+ },
179
+ {
180
+ "epoch": 0.6622516556291391,
181
+ "grad_norm": 0.11899886280298233,
182
+ "learning_rate": 0.000786574519253562,
183
+ "loss": 4.2686,
184
+ "step": 400
185
+ },
186
+ {
187
+ "epoch": 0.6887417218543046,
188
+ "grad_norm": 0.12363821268081665,
189
+ "learning_rate": 0.0007687283646130157,
190
+ "loss": 4.2989,
191
+ "step": 416
192
+ },
193
+ {
194
+ "epoch": 0.7152317880794702,
195
+ "grad_norm": 0.13222382962703705,
196
+ "learning_rate": 0.0007503871329804718,
197
+ "loss": 4.2919,
198
+ "step": 432
199
+ },
200
+ {
201
+ "epoch": 0.7417218543046358,
202
+ "grad_norm": 0.11946064233779907,
203
+ "learning_rate": 0.0007315846143200053,
204
+ "loss": 4.2956,
205
+ "step": 448
206
+ },
207
+ {
208
+ "epoch": 0.7682119205298014,
209
+ "grad_norm": 0.1136200875043869,
210
+ "learning_rate": 0.0007123554484225956,
211
+ "loss": 4.251,
212
+ "step": 464
213
+ },
214
+ {
215
+ "epoch": 0.7947019867549668,
216
+ "grad_norm": 0.13116462528705597,
217
+ "learning_rate": 0.0006927350610894034,
218
+ "loss": 4.3105,
219
+ "step": 480
220
+ },
221
+ {
222
+ "epoch": 0.8211920529801324,
223
+ "grad_norm": 0.10559297353029251,
224
+ "learning_rate": 0.0006727595988669864,
225
+ "loss": 4.262,
226
+ "step": 496
227
+ },
228
+ {
229
+ "epoch": 0.847682119205298,
230
+ "grad_norm": 0.11165904253721237,
231
+ "learning_rate": 0.0006524658624546864,
232
+ "loss": 4.3069,
233
+ "step": 512
234
+ },
235
+ {
236
+ "epoch": 0.8741721854304636,
237
+ "grad_norm": 0.10997848957777023,
238
+ "learning_rate": 0.0006318912389068766,
239
+ "loss": 4.2901,
240
+ "step": 528
241
+ },
242
+ {
243
+ "epoch": 0.9006622516556292,
244
+ "grad_norm": 0.11981035768985748,
245
+ "learning_rate": 0.0006110736327549683,
246
+ "loss": 4.2744,
247
+ "step": 544
248
+ },
249
+ {
250
+ "epoch": 0.9271523178807947,
251
+ "grad_norm": 0.11829685419797897,
252
+ "learning_rate": 0.0005900513961760718,
253
+ "loss": 4.3009,
254
+ "step": 560
255
+ },
256
+ {
257
+ "epoch": 0.9536423841059603,
258
+ "grad_norm": 0.09722407907247543,
259
+ "learning_rate": 0.0005688632583369634,
260
+ "loss": 4.2738,
261
+ "step": 576
262
+ },
263
+ {
264
+ "epoch": 0.9801324503311258,
265
+ "grad_norm": 0.12697675824165344,
266
+ "learning_rate": 0.0005475482540435239,
267
+ "loss": 4.3521,
268
+ "step": 592
269
+ },
270
+ {
271
+ "epoch": 1.0,
272
+ "eval_bleu": 0.10730040886167266,
273
+ "eval_cap_loss": 1.229803388481898,
274
+ "eval_con_loss": 1.8703024170256608,
275
+ "eval_loss": 3.100105801560231,
276
+ "step": 604
277
+ },
278
+ {
279
+ "epoch": 1.0,
280
+ "eval_bleu": 0.10730040886167266,
281
+ "eval_cap_loss": 1.229803388481898,
282
+ "eval_con_loss": 1.8703024170256608,
283
+ "eval_loss": 3.100105801560231,
284
+ "eval_runtime": 247.7711,
285
+ "eval_samples_per_second": 19.49,
286
+ "eval_steps_per_second": 2.438,
287
+ "step": 604
288
+ },
289
+ {
290
+ "epoch": 1.0066225165562914,
291
+ "grad_norm": 0.13247686624526978,
292
+ "learning_rate": 0.000526145651827102,
293
+ "loss": 4.2509,
294
+ "step": 608
295
+ },
296
+ {
297
+ "epoch": 1.033112582781457,
298
+ "grad_norm": 0.1049540564417839,
299
+ "learning_rate": 0.0005046948816002839,
300
+ "loss": 4.2821,
301
+ "step": 624
302
+ },
303
+ {
304
+ "epoch": 1.0596026490066226,
305
+ "grad_norm": 0.1295829862356186,
306
+ "learning_rate": 0.00048323546201535375,
307
+ "loss": 4.2916,
308
+ "step": 640
309
+ },
310
+ {
311
+ "epoch": 1.086092715231788,
312
+ "grad_norm": 0.14188766479492188,
313
+ "learning_rate": 0.0004618069276592665,
314
+ "loss": 4.2996,
315
+ "step": 656
316
+ },
317
+ {
318
+ "epoch": 1.1125827814569536,
319
+ "grad_norm": 0.12427990883588791,
320
+ "learning_rate": 0.0004404487562192665,
321
+ "loss": 4.3079,
322
+ "step": 672
323
+ },
324
+ {
325
+ "epoch": 1.1390728476821192,
326
+ "grad_norm": 0.10745778679847717,
327
+ "learning_rate": 0.0004192002957533321,
328
+ "loss": 4.284,
329
+ "step": 688
330
+ },
331
+ {
332
+ "epoch": 1.1655629139072847,
333
+ "grad_norm": 0.1536702811717987,
334
+ "learning_rate": 0.00039810069219943343,
335
+ "loss": 4.2841,
336
+ "step": 704
337
+ },
338
+ {
339
+ "epoch": 1.1920529801324504,
340
+ "grad_norm": 0.1520632952451706,
341
+ "learning_rate": 0.0003771888172571579,
342
+ "loss": 4.3065,
343
+ "step": 720
344
+ },
345
+ {
346
+ "epoch": 1.218543046357616,
347
+ "grad_norm": 0.1317160278558731,
348
+ "learning_rate": 0.0003565031967745614,
349
+ "loss": 4.2853,
350
+ "step": 736
351
+ },
352
+ {
353
+ "epoch": 1.2450331125827814,
354
+ "grad_norm": 0.1610012948513031,
355
+ "learning_rate": 0.00033608193977218185,
356
+ "loss": 4.3149,
357
+ "step": 752
358
+ },
359
+ {
360
+ "epoch": 1.271523178807947,
361
+ "grad_norm": 0.13628298044204712,
362
+ "learning_rate": 0.0003159626682349709,
363
+ "loss": 4.3005,
364
+ "step": 768
365
+ },
366
+ {
367
+ "epoch": 1.2980132450331126,
368
+ "grad_norm": 0.1552676558494568,
369
+ "learning_rate": 0.00029618244780148955,
370
+ "loss": 4.2843,
371
+ "step": 784
372
+ },
373
+ {
374
+ "epoch": 1.3245033112582782,
375
+ "grad_norm": 0.1701873242855072,
376
+ "learning_rate": 0.0002767777194780578,
377
+ "loss": 4.3283,
378
+ "step": 800
379
+ },
380
+ {
381
+ "epoch": 1.3509933774834437,
382
+ "grad_norm": 0.10417599231004715,
383
+ "learning_rate": 0.00025778423250366167,
384
+ "loss": 4.2768,
385
+ "step": 816
386
+ },
387
+ {
388
+ "epoch": 1.3774834437086092,
389
+ "grad_norm": 0.14864106476306915,
390
+ "learning_rate": 0.0002392369784893001,
391
+ "loss": 4.2471,
392
+ "step": 832
393
+ },
394
+ {
395
+ "epoch": 1.403973509933775,
396
+ "grad_norm": 0.14134187996387482,
397
+ "learning_rate": 0.00022117012695310468,
398
+ "loss": 4.2717,
399
+ "step": 848
400
+ },
401
+ {
402
+ "epoch": 1.4304635761589404,
403
+ "grad_norm": 0.12026234716176987,
404
+ "learning_rate": 0.0002036169623700001,
405
+ "loss": 4.282,
406
+ "step": 864
407
+ },
408
+ {
409
+ "epoch": 1.4569536423841059,
410
+ "grad_norm": 0.15808941423892975,
411
+ "learning_rate": 0.000186609822851872,
412
+ "loss": 4.2811,
413
+ "step": 880
414
+ },
415
+ {
416
+ "epoch": 1.4834437086092715,
417
+ "grad_norm": 0.11768582463264465,
418
+ "learning_rate": 0.00017018004057121894,
419
+ "loss": 4.2527,
420
+ "step": 896
421
+ },
422
+ {
423
+ "epoch": 1.5099337748344372,
424
+ "grad_norm": 0.10410495847463608,
425
+ "learning_rate": 0.00015435788403803702,
426
+ "loss": 4.3271,
427
+ "step": 912
428
+ },
429
+ {
430
+ "epoch": 1.5364238410596025,
431
+ "grad_norm": 0.1393006443977356,
432
+ "learning_rate": 0.00013917250233628969,
433
+ "loss": 4.2384,
434
+ "step": 928
435
+ },
436
+ {
437
+ "epoch": 1.5629139072847682,
438
+ "grad_norm": 0.11578142642974854,
439
+ "learning_rate": 0.00012465187142268687,
440
+ "loss": 4.2827,
441
+ "step": 944
442
+ },
443
+ {
444
+ "epoch": 1.589403973509934,
445
+ "grad_norm": 0.12900201976299286,
446
+ "learning_rate": 0.00011082274258671376,
447
+ "loss": 4.2757,
448
+ "step": 960
449
+ },
450
+ {
451
+ "epoch": 1.6158940397350994,
452
+ "grad_norm": 0.10685920715332031,
453
+ "learning_rate": 9.771059316685665e-05,
454
+ "loss": 4.2779,
455
+ "step": 976
456
+ },
457
+ {
458
+ "epoch": 1.6423841059602649,
459
+ "grad_norm": 0.13312238454818726,
460
+ "learning_rate": 8.533957961382238e-05,
461
+ "loss": 4.2988,
462
+ "step": 992
463
+ },
464
+ {
465
+ "epoch": 1.6688741721854305,
466
+ "grad_norm": 0.1473645716905594,
467
+ "learning_rate": 7.373249298722506e-05,
468
+ "loss": 4.2973,
469
+ "step": 1008
470
+ },
471
+ {
472
+ "epoch": 1.695364238410596,
473
+ "grad_norm": 0.10669790208339691,
474
+ "learning_rate": 6.29107169677236e-05,
475
+ "loss": 4.2533,
476
+ "step": 1024
477
+ },
478
+ {
479
+ "epoch": 1.7218543046357615,
480
+ "grad_norm": 0.13500191271305084,
481
+ "learning_rate": 5.28941884619693e-05,
482
+ "loss": 4.3073,
483
+ "step": 1040
484
+ },
485
+ {
486
+ "epoch": 1.7483443708609272,
487
+ "grad_norm": 0.1250208169221878,
488
+ "learning_rate": 4.370136087293658e-05,
489
+ "loss": 4.2093,
490
+ "step": 1056
491
+ },
492
+ {
493
+ "epoch": 1.7748344370860927,
494
+ "grad_norm": 0.1221160963177681,
495
+ "learning_rate": 3.534917010330652e-05,
496
+ "loss": 4.2625,
497
+ "step": 1072
498
+ },
499
+ {
500
+ "epoch": 1.8013245033112582,
501
+ "grad_norm": 0.11934095621109009,
502
+ "learning_rate": 2.7853003354533555e-05,
503
+ "loss": 4.3391,
504
+ "step": 1088
505
+ },
506
+ {
507
+ "epoch": 1.8278145695364238,
508
+ "grad_norm": 0.13353998959064484,
509
+ "learning_rate": 2.1226670779077306e-05,
510
+ "loss": 4.2848,
511
+ "step": 1104
512
+ },
513
+ {
514
+ "epoch": 1.8543046357615895,
515
+ "grad_norm": 0.10006093233823776,
516
+ "learning_rate": 1.5482380038023768e-05,
517
+ "loss": 4.2608,
518
+ "step": 1120
519
+ },
520
+ {
521
+ "epoch": 1.8807947019867548,
522
+ "grad_norm": 0.10387697070837021,
523
+ "learning_rate": 1.0630713810969639e-05,
524
+ "loss": 4.2986,
525
+ "step": 1136
526
+ },
527
+ {
528
+ "epoch": 1.9072847682119205,
529
+ "grad_norm": 0.13752028346061707,
530
+ "learning_rate": 6.680610299601708e-06,
531
+ "loss": 4.2593,
532
+ "step": 1152
533
+ },
534
+ {
535
+ "epoch": 1.9337748344370862,
536
+ "grad_norm": 0.1313001811504364,
537
+ "learning_rate": 3.639346760890283e-06,
538
+ "loss": 4.2522,
539
+ "step": 1168
540
+ },
541
+ {
542
+ "epoch": 1.9602649006622517,
543
+ "grad_norm": 0.11591313779354095,
544
+ "learning_rate": 1.5125261002330026e-06,
545
+ "loss": 4.2476,
546
+ "step": 1184
547
+ },
548
+ {
549
+ "epoch": 1.9867549668874172,
550
+ "grad_norm": 0.11690162122249603,
551
+ "learning_rate": 3.040665492491379e-07,
552
+ "loss": 4.3406,
553
+ "step": 1200
554
+ },
555
+ {
556
+ "epoch": 2.0,
557
+ "eval_bleu": 0.11121865675263325,
558
+ "eval_cap_loss": 1.2156698384032345,
559
+ "eval_con_loss": 1.8595664712372204,
560
+ "eval_loss": 3.0752363122062176,
561
+ "step": 1208
562
+ },
563
+ {
564
+ "epoch": 2.0,
565
+ "eval_bleu": 0.11121865675263325,
566
+ "eval_cap_loss": 1.2156698384032345,
567
+ "eval_con_loss": 1.8595664712372204,
568
+ "eval_loss": 3.0752363122062176,
569
+ "eval_runtime": 250.9492,
570
+ "eval_samples_per_second": 19.243,
571
+ "eval_steps_per_second": 2.407,
572
+ "step": 1208
573
+ }
574
+ ],
575
+ "logging_steps": 16,
576
+ "max_steps": 1208,
577
+ "num_input_tokens_seen": 0,
578
+ "num_train_epochs": 2,
579
+ "save_steps": 500,
580
+ "stateful_callbacks": {
581
+ "TrainerControl": {
582
+ "args": {
583
+ "should_epoch_stop": false,
584
+ "should_evaluate": false,
585
+ "should_log": false,
586
+ "should_save": true,
587
+ "should_training_stop": true
588
+ },
589
+ "attributes": {}
590
+ }
591
+ },
592
+ "total_flos": 0.0,
593
+ "train_batch_size": 32,
594
+ "trial_name": null,
595
+ "trial_params": null,
596
+ "tau_value": 3.5545
597
+ }