bobox commited on
Commit
7c24867
·
verified ·
1 Parent(s): 4deeff8

Training in progress, step 1742, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -722,6 +722,16 @@ You can finetune this model on your own dataset.
722
  | 0.1770 | 616 | 2.0925 | 1.6521 | 0.9031 |
723
  | 0.2022 | 704 | 2.0016 | 1.4990 | 0.8708 |
724
  | 0.2275 | 792 | 1.7607 | 1.4104 | 0.8444 |
 
 
 
 
 
 
 
 
 
 
725
 
726
 
727
  ### Framework Versions
 
722
  | 0.1770 | 616 | 2.0925 | 1.6521 | 0.9031 |
723
  | 0.2022 | 704 | 2.0016 | 1.4990 | 0.8708 |
724
  | 0.2275 | 792 | 1.7607 | 1.4104 | 0.8444 |
725
+ | 0.2528 | 880 | 1.7801 | 1.3015 | 0.8060 |
726
+ | 0.2781 | 968 | 1.5522 | 1.2201 | 0.7629 |
727
+ | 0.3034 | 1056 | 1.4041 | 1.1747 | 0.6738 |
728
+ | 0.3286 | 1144 | 1.3716 | 1.1800 | 0.6005 |
729
+ | 0.3539 | 1232 | 1.3107 | 1.0875 | 0.6327 |
730
+ | 0.3792 | 1320 | 1.3468 | 1.0540 | 0.5583 |
731
+ | 0.4045 | 1408 | 1.2303 | 1.0083 | 0.5666 |
732
+ | 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
733
+ | 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
734
+ | 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
735
 
736
 
737
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:839951a3b458c681d39df9cfbf4fb7e62a90ce63309b1ee342bf3eb505abfa77
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ae3aa245b90ed7e7fade5e22036fd4db3e79d0e60c917c98d7bb52eae949df
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80497c471d66cac6dd87976e0401d86234ba52917f323c73fec20815ae2f5803
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a292c615b4c7184111afd8393bce447f10def285a7f193dced1d793b3f35c446
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d8681747f3a889afbeb55310a23e4914bfed074d6b3f1c0962b05b54813ee1
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c28b09b15f7da5be865dfaddc8487a10def7b4fd10988e31421f04272831662
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6ae73bfc00f33c333c3cce2cc75ab33f53fec40d952a5013f435bd0955b417a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b19b307d6dfdc51420b9b8a249097f0d2f86a9860bd1617056feaf2607d184f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25021545532892847,
5
  "eval_steps": 88,
6
- "global_step": 871,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -214,6 +214,236 @@
214
  "eval_scitail-pairs-pos_samples_per_second": 162.017,
215
  "eval_scitail-pairs-pos_steps_per_second": 10.188,
216
  "step": 792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
218
  ],
219
  "logging_steps": 88,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5004309106578569,
5
  "eval_steps": 88,
6
+ "global_step": 1742,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
214
  "eval_scitail-pairs-pos_samples_per_second": 162.017,
215
  "eval_scitail-pairs-pos_steps_per_second": 10.188,
216
  "step": 792
217
+ },
218
+ {
219
+ "epoch": 0.25280091927607007,
220
+ "grad_norm": 9.332258224487305,
221
+ "learning_rate": 1.5282854656222804e-05,
222
+ "loss": 1.7801,
223
+ "step": 880
224
+ },
225
+ {
226
+ "epoch": 0.25280091927607007,
227
+ "eval_nli-pairs_loss": 1.3014748096466064,
228
+ "eval_nli-pairs_runtime": 38.916,
229
+ "eval_nli-pairs_samples_per_second": 174.941,
230
+ "eval_nli-pairs_steps_per_second": 10.947,
231
+ "step": 880
232
+ },
233
+ {
234
+ "epoch": 0.25280091927607007,
235
+ "eval_scitail-pairs-pos_loss": 0.8059829473495483,
236
+ "eval_scitail-pairs-pos_runtime": 8.1209,
237
+ "eval_scitail-pairs-pos_samples_per_second": 160.573,
238
+ "eval_scitail-pairs-pos_steps_per_second": 10.097,
239
+ "step": 880
240
+ },
241
+ {
242
+ "epoch": 0.2780810112036771,
243
+ "grad_norm": 10.008861541748047,
244
+ "learning_rate": 1.6814621409921673e-05,
245
+ "loss": 1.5522,
246
+ "step": 968
247
+ },
248
+ {
249
+ "epoch": 0.2780810112036771,
250
+ "eval_nli-pairs_loss": 1.2200833559036255,
251
+ "eval_nli-pairs_runtime": 38.7857,
252
+ "eval_nli-pairs_samples_per_second": 175.529,
253
+ "eval_nli-pairs_steps_per_second": 10.983,
254
+ "step": 968
255
+ },
256
+ {
257
+ "epoch": 0.2780810112036771,
258
+ "eval_scitail-pairs-pos_loss": 0.7629444003105164,
259
+ "eval_scitail-pairs-pos_runtime": 8.1057,
260
+ "eval_scitail-pairs-pos_samples_per_second": 160.874,
261
+ "eval_scitail-pairs-pos_steps_per_second": 10.116,
262
+ "step": 968
263
+ },
264
+ {
265
+ "epoch": 0.3033611031312841,
266
+ "grad_norm": 4.277113437652588,
267
+ "learning_rate": 1.834638816362054e-05,
268
+ "loss": 1.4041,
269
+ "step": 1056
270
+ },
271
+ {
272
+ "epoch": 0.3033611031312841,
273
+ "eval_nli-pairs_loss": 1.1746500730514526,
274
+ "eval_nli-pairs_runtime": 38.7125,
275
+ "eval_nli-pairs_samples_per_second": 175.86,
276
+ "eval_nli-pairs_steps_per_second": 11.004,
277
+ "step": 1056
278
+ },
279
+ {
280
+ "epoch": 0.3033611031312841,
281
+ "eval_scitail-pairs-pos_loss": 0.6737743020057678,
282
+ "eval_scitail-pairs-pos_runtime": 8.0882,
283
+ "eval_scitail-pairs-pos_samples_per_second": 161.222,
284
+ "eval_scitail-pairs-pos_steps_per_second": 10.138,
285
+ "step": 1056
286
+ },
287
+ {
288
+ "epoch": 0.3286411950588911,
289
+ "grad_norm": 12.264771461486816,
290
+ "learning_rate": 1.987815491731941e-05,
291
+ "loss": 1.3716,
292
+ "step": 1144
293
+ },
294
+ {
295
+ "epoch": 0.3286411950588911,
296
+ "eval_nli-pairs_loss": 1.1800155639648438,
297
+ "eval_nli-pairs_runtime": 39.3141,
298
+ "eval_nli-pairs_samples_per_second": 173.169,
299
+ "eval_nli-pairs_steps_per_second": 10.836,
300
+ "step": 1144
301
+ },
302
+ {
303
+ "epoch": 0.3286411950588911,
304
+ "eval_scitail-pairs-pos_loss": 0.6005298495292664,
305
+ "eval_scitail-pairs-pos_runtime": 8.1438,
306
+ "eval_scitail-pairs-pos_samples_per_second": 160.123,
307
+ "eval_scitail-pairs-pos_steps_per_second": 10.069,
308
+ "step": 1144
309
+ },
310
+ {
311
+ "epoch": 0.3539212869864981,
312
+ "grad_norm": 8.721504211425781,
313
+ "learning_rate": 1.994052263586742e-05,
314
+ "loss": 1.3107,
315
+ "step": 1232
316
+ },
317
+ {
318
+ "epoch": 0.3539212869864981,
319
+ "eval_nli-pairs_loss": 1.0875309705734253,
320
+ "eval_nli-pairs_runtime": 38.8492,
321
+ "eval_nli-pairs_samples_per_second": 175.242,
322
+ "eval_nli-pairs_steps_per_second": 10.965,
323
+ "step": 1232
324
+ },
325
+ {
326
+ "epoch": 0.3539212869864981,
327
+ "eval_scitail-pairs-pos_loss": 0.6326610445976257,
328
+ "eval_scitail-pairs-pos_runtime": 8.1233,
329
+ "eval_scitail-pairs-pos_samples_per_second": 160.525,
330
+ "eval_scitail-pairs-pos_steps_per_second": 10.094,
331
+ "step": 1232
332
+ },
333
+ {
334
+ "epoch": 0.37920137891410516,
335
+ "grad_norm": 7.441675186157227,
336
+ "learning_rate": 1.974194676073964e-05,
337
+ "loss": 1.3468,
338
+ "step": 1320
339
+ },
340
+ {
341
+ "epoch": 0.37920137891410516,
342
+ "eval_nli-pairs_loss": 1.0540093183517456,
343
+ "eval_nli-pairs_runtime": 38.2019,
344
+ "eval_nli-pairs_samples_per_second": 178.211,
345
+ "eval_nli-pairs_steps_per_second": 11.151,
346
+ "step": 1320
347
+ },
348
+ {
349
+ "epoch": 0.37920137891410516,
350
+ "eval_scitail-pairs-pos_loss": 0.5582771301269531,
351
+ "eval_scitail-pairs-pos_runtime": 7.795,
352
+ "eval_scitail-pairs-pos_samples_per_second": 167.286,
353
+ "eval_scitail-pairs-pos_steps_per_second": 10.52,
354
+ "step": 1320
355
+ },
356
+ {
357
+ "epoch": 0.40448147084171215,
358
+ "grad_norm": 9.178886413574219,
359
+ "learning_rate": 1.9406615307701736e-05,
360
+ "loss": 1.2303,
361
+ "step": 1408
362
+ },
363
+ {
364
+ "epoch": 0.40448147084171215,
365
+ "eval_nli-pairs_loss": 1.0082660913467407,
366
+ "eval_nli-pairs_runtime": 38.0965,
367
+ "eval_nli-pairs_samples_per_second": 178.704,
368
+ "eval_nli-pairs_steps_per_second": 11.182,
369
+ "step": 1408
370
+ },
371
+ {
372
+ "epoch": 0.40448147084171215,
373
+ "eval_scitail-pairs-pos_loss": 0.5665512084960938,
374
+ "eval_scitail-pairs-pos_runtime": 7.756,
375
+ "eval_scitail-pairs-pos_samples_per_second": 168.128,
376
+ "eval_scitail-pairs-pos_steps_per_second": 10.572,
377
+ "step": 1408
378
+ },
379
+ {
380
+ "epoch": 0.42976156276931915,
381
+ "grad_norm": 7.995066165924072,
382
+ "learning_rate": 1.8939235595298756e-05,
383
+ "loss": 1.1907,
384
+ "step": 1496
385
+ },
386
+ {
387
+ "epoch": 0.42976156276931915,
388
+ "eval_nli-pairs_loss": 0.9647029042243958,
389
+ "eval_nli-pairs_runtime": 38.1584,
390
+ "eval_nli-pairs_samples_per_second": 178.414,
391
+ "eval_nli-pairs_steps_per_second": 11.164,
392
+ "step": 1496
393
+ },
394
+ {
395
+ "epoch": 0.42976156276931915,
396
+ "eval_scitail-pairs-pos_loss": 0.5922390818595886,
397
+ "eval_scitail-pairs-pos_runtime": 7.8063,
398
+ "eval_scitail-pairs-pos_samples_per_second": 167.044,
399
+ "eval_scitail-pairs-pos_steps_per_second": 10.504,
400
+ "step": 1496
401
+ },
402
+ {
403
+ "epoch": 0.45504165469692615,
404
+ "grad_norm": 6.889362335205078,
405
+ "learning_rate": 1.8346368610183863e-05,
406
+ "loss": 1.1587,
407
+ "step": 1584
408
+ },
409
+ {
410
+ "epoch": 0.45504165469692615,
411
+ "eval_nli-pairs_loss": 0.9536527991294861,
412
+ "eval_nli-pairs_runtime": 38.1977,
413
+ "eval_nli-pairs_samples_per_second": 178.231,
414
+ "eval_nli-pairs_steps_per_second": 11.152,
415
+ "step": 1584
416
+ },
417
+ {
418
+ "epoch": 0.45504165469692615,
419
+ "eval_scitail-pairs-pos_loss": 0.5585244297981262,
420
+ "eval_scitail-pairs-pos_runtime": 7.8456,
421
+ "eval_scitail-pairs-pos_samples_per_second": 166.207,
422
+ "eval_scitail-pairs-pos_steps_per_second": 10.452,
423
+ "step": 1584
424
+ },
425
+ {
426
+ "epoch": 0.4803217466245332,
427
+ "grad_norm": 9.089669227600098,
428
+ "learning_rate": 1.7636336905247625e-05,
429
+ "loss": 0.9554,
430
+ "step": 1672
431
+ },
432
+ {
433
+ "epoch": 0.4803217466245332,
434
+ "eval_nli-pairs_loss": 0.9304406046867371,
435
+ "eval_nli-pairs_runtime": 38.1126,
436
+ "eval_nli-pairs_samples_per_second": 178.629,
437
+ "eval_nli-pairs_steps_per_second": 11.177,
438
+ "step": 1672
439
+ },
440
+ {
441
+ "epoch": 0.4803217466245332,
442
+ "eval_scitail-pairs-pos_loss": 0.5591565370559692,
443
+ "eval_scitail-pairs-pos_runtime": 7.8171,
444
+ "eval_scitail-pairs-pos_samples_per_second": 166.813,
445
+ "eval_scitail-pairs-pos_steps_per_second": 10.49,
446
+ "step": 1672
447
  }
448
  ],
449
  "logging_steps": 88,