Training in progress, epoch 4, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -559,6 +559,10 @@ You can finetune this model on your own dataset.
|
|
559 |
| 2.4914 | 1160 | 2.1771 | 0.4300 | 1.7922 |
|
560 |
| 2.7414 | 1276 | 2.2549 | 0.3610 | 1.6473 |
|
561 |
| 2.9914 | 1392 | 2.2168 | 0.2929 | 1.5590 |
|
|
|
|
|
|
|
|
|
562 |
|
563 |
|
564 |
### Framework Versions
|
|
|
559 |
| 2.4914 | 1160 | 2.1771 | 0.4300 | 1.7922 |
|
560 |
| 2.7414 | 1276 | 2.2549 | 0.3610 | 1.6473 |
|
561 |
| 2.9914 | 1392 | 2.2168 | 0.2929 | 1.5590 |
|
562 |
+
| 3.2371 | 1508 | 2.0581 | 0.2678 | 1.5177 |
|
563 |
+
| 3.4871 | 1624 | 1.9654 | 0.2392 | 1.5037 |
|
564 |
+
| 3.7371 | 1740 | 2.1107 | 0.2234 | 1.4557 |
|
565 |
+
| 3.9871 | 1856 | 2.0709 | 0.2094 | 1.4287 |
|
566 |
|
567 |
|
568 |
### Framework Versions
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1130520122
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45eeac36057b0a84a3d410d1974ea4ef60e1f6732308a7c19c7fd4aa6ff3adf0
|
3 |
size 1130520122
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 565251810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5df76c36ed895286d93c7e35de2022f1a4142ca300d68137813a64ef7c2467b2
|
3 |
size 565251810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23ac986d8316aed04b3cf90a12a44166385897956e05972f0500d5a12c28b4e2
|
3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c4113c75337369f487518d15e4b953b9a64a66968b355fbea0722908652f445
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 116,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -283,6 +283,98 @@
|
|
283 |
"eval_qnli-contrastive_samples_per_second": 1380.312,
|
284 |
"eval_qnli-contrastive_steps_per_second": 86.269,
|
285 |
"step": 1392
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
}
|
287 |
],
|
288 |
"logging_steps": 116,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.004310344827586,
|
5 |
"eval_steps": 116,
|
6 |
+
"global_step": 1864,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
283 |
"eval_qnli-contrastive_samples_per_second": 1380.312,
|
284 |
"eval_qnli-contrastive_steps_per_second": 86.269,
|
285 |
"step": 1392
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"epoch": 3.2370689655172415,
|
289 |
+
"grad_norm": 14.837372779846191,
|
290 |
+
"learning_rate": 1.6241871278299807e-06,
|
291 |
+
"loss": 2.0581,
|
292 |
+
"step": 1508
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"epoch": 3.2370689655172415,
|
296 |
+
"eval_nli-pairs_loss": 1.5176913738250732,
|
297 |
+
"eval_nli-pairs_runtime": 1.3641,
|
298 |
+
"eval_nli-pairs_samples_per_second": 1466.194,
|
299 |
+
"eval_nli-pairs_steps_per_second": 91.637,
|
300 |
+
"step": 1508
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"epoch": 3.2370689655172415,
|
304 |
+
"eval_qnli-contrastive_loss": 0.2678474187850952,
|
305 |
+
"eval_qnli-contrastive_runtime": 1.5105,
|
306 |
+
"eval_qnli-contrastive_samples_per_second": 1324.09,
|
307 |
+
"eval_qnli-contrastive_steps_per_second": 82.756,
|
308 |
+
"step": 1508
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"epoch": 3.4870689655172415,
|
312 |
+
"grad_norm": 145.98458862304688,
|
313 |
+
"learning_rate": 1.2734385039668851e-06,
|
314 |
+
"loss": 1.9654,
|
315 |
+
"step": 1624
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"epoch": 3.4870689655172415,
|
319 |
+
"eval_nli-pairs_loss": 1.5036982297897339,
|
320 |
+
"eval_nli-pairs_runtime": 1.3348,
|
321 |
+
"eval_nli-pairs_samples_per_second": 1498.309,
|
322 |
+
"eval_nli-pairs_steps_per_second": 93.644,
|
323 |
+
"step": 1624
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 3.4870689655172415,
|
327 |
+
"eval_qnli-contrastive_loss": 0.23919104039669037,
|
328 |
+
"eval_qnli-contrastive_runtime": 1.5129,
|
329 |
+
"eval_qnli-contrastive_samples_per_second": 1321.928,
|
330 |
+
"eval_qnli-contrastive_steps_per_second": 82.621,
|
331 |
+
"step": 1624
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 3.737068965517241,
|
335 |
+
"grad_norm": 10.36633586883545,
|
336 |
+
"learning_rate": 9.350923617759733e-07,
|
337 |
+
"loss": 2.1107,
|
338 |
+
"step": 1740
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 3.737068965517241,
|
342 |
+
"eval_nli-pairs_loss": 1.4556528329849243,
|
343 |
+
"eval_nli-pairs_runtime": 1.4177,
|
344 |
+
"eval_nli-pairs_samples_per_second": 1410.69,
|
345 |
+
"eval_nli-pairs_steps_per_second": 88.168,
|
346 |
+
"step": 1740
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 3.737068965517241,
|
350 |
+
"eval_qnli-contrastive_loss": 0.22335131466388702,
|
351 |
+
"eval_qnli-contrastive_runtime": 1.5405,
|
352 |
+
"eval_qnli-contrastive_samples_per_second": 1298.243,
|
353 |
+
"eval_qnli-contrastive_steps_per_second": 81.14,
|
354 |
+
"step": 1740
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 3.987068965517241,
|
358 |
+
"grad_norm": 178.8499755859375,
|
359 |
+
"learning_rate": 6.276705238124942e-07,
|
360 |
+
"loss": 2.0709,
|
361 |
+
"step": 1856
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 3.987068965517241,
|
365 |
+
"eval_nli-pairs_loss": 1.4286649227142334,
|
366 |
+
"eval_nli-pairs_runtime": 1.2929,
|
367 |
+
"eval_nli-pairs_samples_per_second": 1546.95,
|
368 |
+
"eval_nli-pairs_steps_per_second": 96.684,
|
369 |
+
"step": 1856
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 3.987068965517241,
|
373 |
+
"eval_qnli-contrastive_loss": 0.2093583047389984,
|
374 |
+
"eval_qnli-contrastive_runtime": 1.4454,
|
375 |
+
"eval_qnli-contrastive_samples_per_second": 1383.695,
|
376 |
+
"eval_qnli-contrastive_steps_per_second": 86.481,
|
377 |
+
"step": 1856
|
378 |
}
|
379 |
],
|
380 |
"logging_steps": 116,
|