bobox commited on
Commit
e3a27c6
·
verified ·
1 Parent(s): ce22346

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -559,6 +559,10 @@ You can finetune this model on your own dataset.
559
  | 2.4914 | 1160 | 2.1771 | 0.4300 | 1.7922 |
560
  | 2.7414 | 1276 | 2.2549 | 0.3610 | 1.6473 |
561
  | 2.9914 | 1392 | 2.2168 | 0.2929 | 1.5590 |
 
 
 
 
562
 
563
 
564
  ### Framework Versions
 
559
  | 2.4914 | 1160 | 2.1771 | 0.4300 | 1.7922 |
560
  | 2.7414 | 1276 | 2.2549 | 0.3610 | 1.6473 |
561
  | 2.9914 | 1392 | 2.2168 | 0.2929 | 1.5590 |
562
+ | 3.2371 | 1508 | 2.0581 | 0.2678 | 1.5177 |
563
+ | 3.4871 | 1624 | 1.9654 | 0.2392 | 1.5037 |
564
+ | 3.7371 | 1740 | 2.1107 | 0.2234 | 1.4557 |
565
+ | 3.9871 | 1856 | 2.0709 | 0.2094 | 1.4287 |
566
 
567
 
568
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb04608c64b22d6262681835d6c550177e812632d19e8bed563d46fd84e0e69
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45eeac36057b0a84a3d410d1974ea4ef60e1f6732308a7c19c7fd4aa6ff3adf0
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3ad09b012913d64a2d27765f2b0342e4a0f1edd69a6c8ffd7a83dcee57b952e
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df76c36ed895286d93c7e35de2022f1a4142ca300d68137813a64ef7c2467b2
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d0bf755340af85865c74eb993cae626d4eb319120da8fd389f3930be54bbea
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ac986d8316aed04b3cf90a12a44166385897956e05972f0500d5a12c28b4e2
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e270a9661cb837eaec0b7e50a864e82bdf74fee38281b90e4447c6c11c7af62d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4113c75337369f487518d15e4b953b9a64a66968b355fbea0722908652f445
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0043103448275863,
5
  "eval_steps": 116,
6
- "global_step": 1398,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -283,6 +283,98 @@
283
  "eval_qnli-contrastive_samples_per_second": 1380.312,
284
  "eval_qnli-contrastive_steps_per_second": 86.269,
285
  "step": 1392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  }
287
  ],
288
  "logging_steps": 116,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.004310344827586,
5
  "eval_steps": 116,
6
+ "global_step": 1864,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
283
  "eval_qnli-contrastive_samples_per_second": 1380.312,
284
  "eval_qnli-contrastive_steps_per_second": 86.269,
285
  "step": 1392
286
+ },
287
+ {
288
+ "epoch": 3.2370689655172415,
289
+ "grad_norm": 14.837372779846191,
290
+ "learning_rate": 1.6241871278299807e-06,
291
+ "loss": 2.0581,
292
+ "step": 1508
293
+ },
294
+ {
295
+ "epoch": 3.2370689655172415,
296
+ "eval_nli-pairs_loss": 1.5176913738250732,
297
+ "eval_nli-pairs_runtime": 1.3641,
298
+ "eval_nli-pairs_samples_per_second": 1466.194,
299
+ "eval_nli-pairs_steps_per_second": 91.637,
300
+ "step": 1508
301
+ },
302
+ {
303
+ "epoch": 3.2370689655172415,
304
+ "eval_qnli-contrastive_loss": 0.2678474187850952,
305
+ "eval_qnli-contrastive_runtime": 1.5105,
306
+ "eval_qnli-contrastive_samples_per_second": 1324.09,
307
+ "eval_qnli-contrastive_steps_per_second": 82.756,
308
+ "step": 1508
309
+ },
310
+ {
311
+ "epoch": 3.4870689655172415,
312
+ "grad_norm": 145.98458862304688,
313
+ "learning_rate": 1.2734385039668851e-06,
314
+ "loss": 1.9654,
315
+ "step": 1624
316
+ },
317
+ {
318
+ "epoch": 3.4870689655172415,
319
+ "eval_nli-pairs_loss": 1.5036982297897339,
320
+ "eval_nli-pairs_runtime": 1.3348,
321
+ "eval_nli-pairs_samples_per_second": 1498.309,
322
+ "eval_nli-pairs_steps_per_second": 93.644,
323
+ "step": 1624
324
+ },
325
+ {
326
+ "epoch": 3.4870689655172415,
327
+ "eval_qnli-contrastive_loss": 0.23919104039669037,
328
+ "eval_qnli-contrastive_runtime": 1.5129,
329
+ "eval_qnli-contrastive_samples_per_second": 1321.928,
330
+ "eval_qnli-contrastive_steps_per_second": 82.621,
331
+ "step": 1624
332
+ },
333
+ {
334
+ "epoch": 3.737068965517241,
335
+ "grad_norm": 10.36633586883545,
336
+ "learning_rate": 9.350923617759733e-07,
337
+ "loss": 2.1107,
338
+ "step": 1740
339
+ },
340
+ {
341
+ "epoch": 3.737068965517241,
342
+ "eval_nli-pairs_loss": 1.4556528329849243,
343
+ "eval_nli-pairs_runtime": 1.4177,
344
+ "eval_nli-pairs_samples_per_second": 1410.69,
345
+ "eval_nli-pairs_steps_per_second": 88.168,
346
+ "step": 1740
347
+ },
348
+ {
349
+ "epoch": 3.737068965517241,
350
+ "eval_qnli-contrastive_loss": 0.22335131466388702,
351
+ "eval_qnli-contrastive_runtime": 1.5405,
352
+ "eval_qnli-contrastive_samples_per_second": 1298.243,
353
+ "eval_qnli-contrastive_steps_per_second": 81.14,
354
+ "step": 1740
355
+ },
356
+ {
357
+ "epoch": 3.987068965517241,
358
+ "grad_norm": 178.8499755859375,
359
+ "learning_rate": 6.276705238124942e-07,
360
+ "loss": 2.0709,
361
+ "step": 1856
362
+ },
363
+ {
364
+ "epoch": 3.987068965517241,
365
+ "eval_nli-pairs_loss": 1.4286649227142334,
366
+ "eval_nli-pairs_runtime": 1.2929,
367
+ "eval_nli-pairs_samples_per_second": 1546.95,
368
+ "eval_nli-pairs_steps_per_second": 96.684,
369
+ "step": 1856
370
+ },
371
+ {
372
+ "epoch": 3.987068965517241,
373
+ "eval_qnli-contrastive_loss": 0.2093583047389984,
374
+ "eval_qnli-contrastive_runtime": 1.4454,
375
+ "eval_qnli-contrastive_samples_per_second": 1383.695,
376
+ "eval_qnli-contrastive_steps_per_second": 86.481,
377
+ "step": 1856
378
  }
379
  ],
380
  "logging_steps": 116,