Training in progress, step 5392, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 903834408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fb9521546742e5ab06169707c06cc043bec2d4f930941361e3febca593be593
|
3 |
size 903834408
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1807824186
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc632dd60615524ed9d3b3017351f4b7d961867652709cf0787424ce518c75df
|
3 |
size 1807824186
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ffe3c35f79e8a24334ee3298aa52cdf049e1eff33cb89177eb46490ca5a3c18
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:031a255d4ec60870f505044effca17dbde0758a421a8fda981c50d676e69969a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 16,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -40663,6 +40663,127 @@
|
|
40663 |
"eval_samples_per_second": 11.336,
|
40664 |
"eval_steps_per_second": 1.417,
|
40665 |
"step": 5376
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40666 |
}
|
40667 |
],
|
40668 |
"logging_steps": 1,
|
@@ -40682,7 +40803,7 @@
|
|
40682 |
"attributes": {}
|
40683 |
}
|
40684 |
},
|
40685 |
-
"total_flos": 1.
|
40686 |
"train_batch_size": 8,
|
40687 |
"trial_name": null,
|
40688 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.34857373737373737,
|
5 |
"eval_steps": 16,
|
6 |
+
"global_step": 5392,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
40663 |
"eval_samples_per_second": 11.336,
|
40664 |
"eval_steps_per_second": 1.417,
|
40665 |
"step": 5376
|
40666 |
+
},
|
40667 |
+
{
|
40668 |
+
"epoch": 0.3476040404040404,
|
40669 |
+
"grad_norm": 0.06481784582138062,
|
40670 |
+
"learning_rate": 0.00019442224357848402,
|
40671 |
+
"loss": 0.0972,
|
40672 |
+
"step": 5377
|
40673 |
+
},
|
40674 |
+
{
|
40675 |
+
"epoch": 0.34766868686868685,
|
40676 |
+
"grad_norm": 0.06606926023960114,
|
40677 |
+
"learning_rate": 0.00019441999134371015,
|
40678 |
+
"loss": 0.0868,
|
40679 |
+
"step": 5378
|
40680 |
+
},
|
40681 |
+
{
|
40682 |
+
"epoch": 0.34773333333333334,
|
40683 |
+
"grad_norm": 0.054367631673812866,
|
40684 |
+
"learning_rate": 0.0001944177386673652,
|
40685 |
+
"loss": 0.0822,
|
40686 |
+
"step": 5379
|
40687 |
+
},
|
40688 |
+
{
|
40689 |
+
"epoch": 0.3477979797979798,
|
40690 |
+
"grad_norm": 0.0662309005856514,
|
40691 |
+
"learning_rate": 0.00019441548554945972,
|
40692 |
+
"loss": 0.0874,
|
40693 |
+
"step": 5380
|
40694 |
+
},
|
40695 |
+
{
|
40696 |
+
"epoch": 0.34786262626262626,
|
40697 |
+
"grad_norm": 0.06305788457393646,
|
40698 |
+
"learning_rate": 0.0001944132319900042,
|
40699 |
+
"loss": 0.0818,
|
40700 |
+
"step": 5381
|
40701 |
+
},
|
40702 |
+
{
|
40703 |
+
"epoch": 0.34792727272727275,
|
40704 |
+
"grad_norm": 0.06198299676179886,
|
40705 |
+
"learning_rate": 0.00019441097798900922,
|
40706 |
+
"loss": 0.0885,
|
40707 |
+
"step": 5382
|
40708 |
+
},
|
40709 |
+
{
|
40710 |
+
"epoch": 0.3479919191919192,
|
40711 |
+
"grad_norm": 0.05868459865450859,
|
40712 |
+
"learning_rate": 0.00019440872354648529,
|
40713 |
+
"loss": 0.0783,
|
40714 |
+
"step": 5383
|
40715 |
+
},
|
40716 |
+
{
|
40717 |
+
"epoch": 0.3480565656565657,
|
40718 |
+
"grad_norm": 0.0710318386554718,
|
40719 |
+
"learning_rate": 0.000194406468662443,
|
40720 |
+
"loss": 0.095,
|
40721 |
+
"step": 5384
|
40722 |
+
},
|
40723 |
+
{
|
40724 |
+
"epoch": 0.3481212121212121,
|
40725 |
+
"grad_norm": 0.07270139455795288,
|
40726 |
+
"learning_rate": 0.00019440421333689285,
|
40727 |
+
"loss": 0.1007,
|
40728 |
+
"step": 5385
|
40729 |
+
},
|
40730 |
+
{
|
40731 |
+
"epoch": 0.3481858585858586,
|
40732 |
+
"grad_norm": 0.06582971662282944,
|
40733 |
+
"learning_rate": 0.00019440195756984538,
|
40734 |
+
"loss": 0.0902,
|
40735 |
+
"step": 5386
|
40736 |
+
},
|
40737 |
+
{
|
40738 |
+
"epoch": 0.34825050505050503,
|
40739 |
+
"grad_norm": 0.06729482114315033,
|
40740 |
+
"learning_rate": 0.0001943997013613112,
|
40741 |
+
"loss": 0.0854,
|
40742 |
+
"step": 5387
|
40743 |
+
},
|
40744 |
+
{
|
40745 |
+
"epoch": 0.3483151515151515,
|
40746 |
+
"grad_norm": 0.06232403591275215,
|
40747 |
+
"learning_rate": 0.0001943974447113008,
|
40748 |
+
"loss": 0.0749,
|
40749 |
+
"step": 5388
|
40750 |
+
},
|
40751 |
+
{
|
40752 |
+
"epoch": 0.34837979797979796,
|
40753 |
+
"grad_norm": 0.08777357637882233,
|
40754 |
+
"learning_rate": 0.00019439518761982477,
|
40755 |
+
"loss": 0.0951,
|
40756 |
+
"step": 5389
|
40757 |
+
},
|
40758 |
+
{
|
40759 |
+
"epoch": 0.34844444444444445,
|
40760 |
+
"grad_norm": 0.057237379252910614,
|
40761 |
+
"learning_rate": 0.00019439293008689364,
|
40762 |
+
"loss": 0.0753,
|
40763 |
+
"step": 5390
|
40764 |
+
},
|
40765 |
+
{
|
40766 |
+
"epoch": 0.34850909090909094,
|
40767 |
+
"grad_norm": 0.06308239698410034,
|
40768 |
+
"learning_rate": 0.00019439067211251803,
|
40769 |
+
"loss": 0.0908,
|
40770 |
+
"step": 5391
|
40771 |
+
},
|
40772 |
+
{
|
40773 |
+
"epoch": 0.34857373737373737,
|
40774 |
+
"grad_norm": 0.052329737693071365,
|
40775 |
+
"learning_rate": 0.00019438841369670838,
|
40776 |
+
"loss": 0.0662,
|
40777 |
+
"step": 5392
|
40778 |
+
},
|
40779 |
+
{
|
40780 |
+
"epoch": 0.34857373737373737,
|
40781 |
+
"eval_bleu": 13.954759479769704,
|
40782 |
+
"eval_loss": 0.09024699032306671,
|
40783 |
+
"eval_runtime": 2.8264,
|
40784 |
+
"eval_samples_per_second": 11.322,
|
40785 |
+
"eval_steps_per_second": 1.415,
|
40786 |
+
"step": 5392
|
40787 |
}
|
40788 |
],
|
40789 |
"logging_steps": 1,
|
|
|
40803 |
"attributes": {}
|
40804 |
}
|
40805 |
},
|
40806 |
+
"total_flos": 1.0507203026878464e+17,
|
40807 |
"train_batch_size": 8,
|
40808 |
"trial_name": null,
|
40809 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a05b39c84dbf67327b54c00b0712aecde2c94f9948f9a446688d5d5d918449e
|
3 |
size 5240
|