Training in progress, step 1900, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48f0b842ee73338c5196631f87772bba6f5edf4b3ae89cae7bbfc7f309e0857a
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3172121c5181db7f36c5f20a872297dfe5b4f0ae30a7959ec1c6216d04d0d1cc
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a60cedca48e5fc8740b4f1f705f978c0560a4e6385b3969f4dac4afed261e8
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb17705c60e3748d16ff6a5ed77b771e13f629bc8439632ab64d0f641cc2332a
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22c799f3fc1e686a2648fd9a88df8f0e9f27001631c96224ad9df9e896a5d223
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900d0bd1d3c3bcd0dad9c4909629cf63a5d624cabc1257f001e2d9077a9e9e53
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e86c07f1298f4667edef5c54e67b1e608e33a7d17ed5a2972f6c419f38e6ca94
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85791137bbe5abdeb01422c95c0695f38d7b465390cfce57a8908907a93aa9c3
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eeab6b7e925d9ac0af1499c6158c2bd3d2fa709063a35e8908c75fc9a3bf66e
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d56b19c89e54575da49ba3691c2d1cd4239936a6e7cdd184f280c64e52c90fc2
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:660fb9882f614217e98ebdc720c67d1f69f90546870acb0d060c2c463fa269c7
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc5612b1d5a8804a93743e626503af0c5c4b4134be7747f86c470f7d404097de
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fbd4dd1b12705ad122adddd6e7db3dc1baec5f8063c359269d322c1f0027ee1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -13699,6 +13699,766 @@
|
|
13699 |
"eval_samples_per_second": 5.301,
|
13700 |
"eval_steps_per_second": 0.176,
|
13701 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13702 |
}
|
13703 |
],
|
13704 |
"logging_steps": 1,
|
@@ -13718,7 +14478,7 @@
|
|
13718 |
"attributes": {}
|
13719 |
}
|
13720 |
},
|
13721 |
-
"total_flos":
|
13722 |
"train_batch_size": 8,
|
13723 |
"trial_name": null,
|
13724 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9006873666745674,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 1900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
13699 |
"eval_samples_per_second": 5.301,
|
13700 |
"eval_steps_per_second": 0.176,
|
13701 |
"step": 1800
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 0.8537568144109978,
|
13705 |
+
"grad_norm": 3.1635870933532715,
|
13706 |
+
"learning_rate": 1.2716048081775823e-06,
|
13707 |
+
"loss": 0.1749,
|
13708 |
+
"step": 1801
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 0.8542308603934582,
|
13712 |
+
"grad_norm": 8.236771583557129,
|
13713 |
+
"learning_rate": 1.2635392116408095e-06,
|
13714 |
+
"loss": 0.1951,
|
13715 |
+
"step": 1802
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 0.8547049063759184,
|
13719 |
+
"grad_norm": 7.184986114501953,
|
13720 |
+
"learning_rate": 1.2554975506182533e-06,
|
13721 |
+
"loss": 0.157,
|
13722 |
+
"step": 1803
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 0.8551789523583787,
|
13726 |
+
"grad_norm": 2.5440175533294678,
|
13727 |
+
"learning_rate": 1.247479847141867e-06,
|
13728 |
+
"loss": 0.0708,
|
13729 |
+
"step": 1804
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.8556529983408391,
|
13733 |
+
"grad_norm": 3.4107348918914795,
|
13734 |
+
"learning_rate": 1.2394861231779677e-06,
|
13735 |
+
"loss": 0.0968,
|
13736 |
+
"step": 1805
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 0.8561270443232993,
|
13740 |
+
"grad_norm": 8.80566692352295,
|
13741 |
+
"learning_rate": 1.2315164006271718e-06,
|
13742 |
+
"loss": 0.1692,
|
13743 |
+
"step": 1806
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 0.8566010903057597,
|
13747 |
+
"grad_norm": 5.312666416168213,
|
13748 |
+
"learning_rate": 1.2235707013243426e-06,
|
13749 |
+
"loss": 0.119,
|
13750 |
+
"step": 1807
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.85707513628822,
|
13754 |
+
"grad_norm": 7.94031286239624,
|
13755 |
+
"learning_rate": 1.2156490470385207e-06,
|
13756 |
+
"loss": 0.1993,
|
13757 |
+
"step": 1808
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 0.8575491822706802,
|
13761 |
+
"grad_norm": 3.85893177986145,
|
13762 |
+
"learning_rate": 1.2077514594728778e-06,
|
13763 |
+
"loss": 0.1085,
|
13764 |
+
"step": 1809
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.8580232282531406,
|
13768 |
+
"grad_norm": 2.8813283443450928,
|
13769 |
+
"learning_rate": 1.1998779602646438e-06,
|
13770 |
+
"loss": 0.1099,
|
13771 |
+
"step": 1810
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.8584972742356008,
|
13775 |
+
"grad_norm": 4.950772762298584,
|
13776 |
+
"learning_rate": 1.1920285709850509e-06,
|
13777 |
+
"loss": 0.1064,
|
13778 |
+
"step": 1811
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 0.8589713202180611,
|
13782 |
+
"grad_norm": 3.9935288429260254,
|
13783 |
+
"learning_rate": 1.184203313139286e-06,
|
13784 |
+
"loss": 0.1145,
|
13785 |
+
"step": 1812
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 0.8594453662005215,
|
13789 |
+
"grad_norm": 5.1902360916137695,
|
13790 |
+
"learning_rate": 1.1764022081664094e-06,
|
13791 |
+
"loss": 0.164,
|
13792 |
+
"step": 1813
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 0.8599194121829817,
|
13796 |
+
"grad_norm": 4.6810150146484375,
|
13797 |
+
"learning_rate": 1.1686252774393181e-06,
|
13798 |
+
"loss": 0.1272,
|
13799 |
+
"step": 1814
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 0.860393458165442,
|
13803 |
+
"grad_norm": 3.890429735183716,
|
13804 |
+
"learning_rate": 1.1608725422646782e-06,
|
13805 |
+
"loss": 0.1128,
|
13806 |
+
"step": 1815
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 0.8608675041479024,
|
13810 |
+
"grad_norm": 9.929910659790039,
|
13811 |
+
"learning_rate": 1.1531440238828639e-06,
|
13812 |
+
"loss": 0.169,
|
13813 |
+
"step": 1816
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.8613415501303626,
|
13817 |
+
"grad_norm": 3.39127516746521,
|
13818 |
+
"learning_rate": 1.1454397434679022e-06,
|
13819 |
+
"loss": 0.0916,
|
13820 |
+
"step": 1817
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.861815596112823,
|
13824 |
+
"grad_norm": 3.8935232162475586,
|
13825 |
+
"learning_rate": 1.137759722127415e-06,
|
13826 |
+
"loss": 0.1236,
|
13827 |
+
"step": 1818
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 0.8622896420952832,
|
13831 |
+
"grad_norm": 4.592057704925537,
|
13832 |
+
"learning_rate": 1.1301039809025628e-06,
|
13833 |
+
"loss": 0.1573,
|
13834 |
+
"step": 1819
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 0.8627636880777435,
|
13838 |
+
"grad_norm": 3.4906246662139893,
|
13839 |
+
"learning_rate": 1.1224725407679814e-06,
|
13840 |
+
"loss": 0.0799,
|
13841 |
+
"step": 1820
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 0.8627636880777435,
|
13845 |
+
"eval_accuracy": 0.9943639291465378,
|
13846 |
+
"eval_f1": 0.9369369369369369,
|
13847 |
+
"eval_loss": 0.014933480881154537,
|
13848 |
+
"eval_precision": 0.8813559322033898,
|
13849 |
+
"eval_recall": 1.0,
|
13850 |
+
"eval_runtime": 49.8899,
|
13851 |
+
"eval_samples_per_second": 5.432,
|
13852 |
+
"eval_steps_per_second": 0.18,
|
13853 |
+
"step": 1820
|
13854 |
+
},
|
13855 |
+
{
|
13856 |
+
"epoch": 0.8632377340602039,
|
13857 |
+
"grad_norm": 7.27462911605835,
|
13858 |
+
"learning_rate": 1.1148654226317325e-06,
|
13859 |
+
"loss": 0.1538,
|
13860 |
+
"step": 1821
|
13861 |
+
},
|
13862 |
+
{
|
13863 |
+
"epoch": 0.8637117800426641,
|
13864 |
+
"grad_norm": 3.6112170219421387,
|
13865 |
+
"learning_rate": 1.1072826473352394e-06,
|
13866 |
+
"loss": 0.1337,
|
13867 |
+
"step": 1822
|
13868 |
+
},
|
13869 |
+
{
|
13870 |
+
"epoch": 0.8641858260251244,
|
13871 |
+
"grad_norm": 4.936607360839844,
|
13872 |
+
"learning_rate": 1.0997242356532335e-06,
|
13873 |
+
"loss": 0.152,
|
13874 |
+
"step": 1823
|
13875 |
+
},
|
13876 |
+
{
|
13877 |
+
"epoch": 0.8646598720075848,
|
13878 |
+
"grad_norm": 7.105523109436035,
|
13879 |
+
"learning_rate": 1.0921902082936987e-06,
|
13880 |
+
"loss": 0.1772,
|
13881 |
+
"step": 1824
|
13882 |
+
},
|
13883 |
+
{
|
13884 |
+
"epoch": 0.865133917990045,
|
13885 |
+
"grad_norm": 7.956032752990723,
|
13886 |
+
"learning_rate": 1.0846805858978038e-06,
|
13887 |
+
"loss": 0.1794,
|
13888 |
+
"step": 1825
|
13889 |
+
},
|
13890 |
+
{
|
13891 |
+
"epoch": 0.8656079639725053,
|
13892 |
+
"grad_norm": 2.654674530029297,
|
13893 |
+
"learning_rate": 1.0771953890398679e-06,
|
13894 |
+
"loss": 0.1223,
|
13895 |
+
"step": 1826
|
13896 |
+
},
|
13897 |
+
{
|
13898 |
+
"epoch": 0.8660820099549656,
|
13899 |
+
"grad_norm": 5.592787265777588,
|
13900 |
+
"learning_rate": 1.0697346382272822e-06,
|
13901 |
+
"loss": 0.1459,
|
13902 |
+
"step": 1827
|
13903 |
+
},
|
13904 |
+
{
|
13905 |
+
"epoch": 0.8665560559374259,
|
13906 |
+
"grad_norm": 9.735966682434082,
|
13907 |
+
"learning_rate": 1.0622983539004628e-06,
|
13908 |
+
"loss": 0.176,
|
13909 |
+
"step": 1828
|
13910 |
+
},
|
13911 |
+
{
|
13912 |
+
"epoch": 0.8670301019198863,
|
13913 |
+
"grad_norm": 7.254702568054199,
|
13914 |
+
"learning_rate": 1.054886556432798e-06,
|
13915 |
+
"loss": 0.1962,
|
13916 |
+
"step": 1829
|
13917 |
+
},
|
13918 |
+
{
|
13919 |
+
"epoch": 0.8675041479023465,
|
13920 |
+
"grad_norm": 3.388284206390381,
|
13921 |
+
"learning_rate": 1.047499266130585e-06,
|
13922 |
+
"loss": 0.1744,
|
13923 |
+
"step": 1830
|
13924 |
+
},
|
13925 |
+
{
|
13926 |
+
"epoch": 0.8679781938848068,
|
13927 |
+
"grad_norm": 5.419455051422119,
|
13928 |
+
"learning_rate": 1.0401365032329812e-06,
|
13929 |
+
"loss": 0.2004,
|
13930 |
+
"step": 1831
|
13931 |
+
},
|
13932 |
+
{
|
13933 |
+
"epoch": 0.8684522398672672,
|
13934 |
+
"grad_norm": 5.385417938232422,
|
13935 |
+
"learning_rate": 1.0327982879119425e-06,
|
13936 |
+
"loss": 0.2027,
|
13937 |
+
"step": 1832
|
13938 |
+
},
|
13939 |
+
{
|
13940 |
+
"epoch": 0.8689262858497274,
|
13941 |
+
"grad_norm": 4.17827033996582,
|
13942 |
+
"learning_rate": 1.0254846402721764e-06,
|
13943 |
+
"loss": 0.1501,
|
13944 |
+
"step": 1833
|
13945 |
+
},
|
13946 |
+
{
|
13947 |
+
"epoch": 0.8694003318321877,
|
13948 |
+
"grad_norm": 4.2940354347229,
|
13949 |
+
"learning_rate": 1.0181955803510724e-06,
|
13950 |
+
"loss": 0.1162,
|
13951 |
+
"step": 1834
|
13952 |
+
},
|
13953 |
+
{
|
13954 |
+
"epoch": 0.869874377814648,
|
13955 |
+
"grad_norm": 8.596222877502441,
|
13956 |
+
"learning_rate": 1.010931128118665e-06,
|
13957 |
+
"loss": 0.1216,
|
13958 |
+
"step": 1835
|
13959 |
+
},
|
13960 |
+
{
|
13961 |
+
"epoch": 0.8703484237971083,
|
13962 |
+
"grad_norm": 4.9963884353637695,
|
13963 |
+
"learning_rate": 1.0036913034775675e-06,
|
13964 |
+
"loss": 0.1779,
|
13965 |
+
"step": 1836
|
13966 |
+
},
|
13967 |
+
{
|
13968 |
+
"epoch": 0.8708224697795686,
|
13969 |
+
"grad_norm": 4.238993167877197,
|
13970 |
+
"learning_rate": 9.964761262629196e-07,
|
13971 |
+
"loss": 0.1237,
|
13972 |
+
"step": 1837
|
13973 |
+
},
|
13974 |
+
{
|
13975 |
+
"epoch": 0.8712965157620289,
|
13976 |
+
"grad_norm": 8.45755672454834,
|
13977 |
+
"learning_rate": 9.892856162423348e-07,
|
13978 |
+
"loss": 0.1578,
|
13979 |
+
"step": 1838
|
13980 |
+
},
|
13981 |
+
{
|
13982 |
+
"epoch": 0.8717705617444892,
|
13983 |
+
"grad_norm": 7.35408353805542,
|
13984 |
+
"learning_rate": 9.821197931158455e-07,
|
13985 |
+
"loss": 0.2077,
|
13986 |
+
"step": 1839
|
13987 |
+
},
|
13988 |
+
{
|
13989 |
+
"epoch": 0.8722446077269496,
|
13990 |
+
"grad_norm": 4.194153785705566,
|
13991 |
+
"learning_rate": 9.749786765158464e-07,
|
13992 |
+
"loss": 0.1294,
|
13993 |
+
"step": 1840
|
13994 |
+
},
|
13995 |
+
{
|
13996 |
+
"epoch": 0.8722446077269496,
|
13997 |
+
"eval_accuracy": 0.9943639291465378,
|
13998 |
+
"eval_f1": 0.9369369369369369,
|
13999 |
+
"eval_loss": 0.012980014085769653,
|
14000 |
+
"eval_precision": 0.8813559322033898,
|
14001 |
+
"eval_recall": 1.0,
|
14002 |
+
"eval_runtime": 49.6245,
|
14003 |
+
"eval_samples_per_second": 5.461,
|
14004 |
+
"eval_steps_per_second": 0.181,
|
14005 |
+
"step": 1840
|
14006 |
+
},
|
14007 |
+
{
|
14008 |
+
"epoch": 0.8727186537094098,
|
14009 |
+
"grad_norm": 8.562105178833008,
|
14010 |
+
"learning_rate": 9.678622860070474e-07,
|
14011 |
+
"loss": 0.2731,
|
14012 |
+
"step": 1841
|
14013 |
+
},
|
14014 |
+
{
|
14015 |
+
"epoch": 0.8731926996918701,
|
14016 |
+
"grad_norm": 7.327461242675781,
|
14017 |
+
"learning_rate": 9.607706410864083e-07,
|
14018 |
+
"loss": 0.1846,
|
14019 |
+
"step": 1842
|
14020 |
+
},
|
14021 |
+
{
|
14022 |
+
"epoch": 0.8736667456743304,
|
14023 |
+
"grad_norm": 5.737156867980957,
|
14024 |
+
"learning_rate": 9.537037611831047e-07,
|
14025 |
+
"loss": 0.2219,
|
14026 |
+
"step": 1843
|
14027 |
+
},
|
14028 |
+
{
|
14029 |
+
"epoch": 0.8741407916567907,
|
14030 |
+
"grad_norm": 3.665459156036377,
|
14031 |
+
"learning_rate": 9.466616656584493e-07,
|
14032 |
+
"loss": 0.1163,
|
14033 |
+
"step": 1844
|
14034 |
+
},
|
14035 |
+
{
|
14036 |
+
"epoch": 0.874614837639251,
|
14037 |
+
"grad_norm": 5.575207710266113,
|
14038 |
+
"learning_rate": 9.396443738058614e-07,
|
14039 |
+
"loss": 0.1411,
|
14040 |
+
"step": 1845
|
14041 |
+
},
|
14042 |
+
{
|
14043 |
+
"epoch": 0.8750888836217113,
|
14044 |
+
"grad_norm": 2.1095454692840576,
|
14045 |
+
"learning_rate": 9.32651904850801e-07,
|
14046 |
+
"loss": 0.0826,
|
14047 |
+
"step": 1846
|
14048 |
+
},
|
14049 |
+
{
|
14050 |
+
"epoch": 0.8755629296041716,
|
14051 |
+
"grad_norm": 8.68192195892334,
|
14052 |
+
"learning_rate": 9.256842779507236e-07,
|
14053 |
+
"loss": 0.1324,
|
14054 |
+
"step": 1847
|
14055 |
+
},
|
14056 |
+
{
|
14057 |
+
"epoch": 0.8760369755866318,
|
14058 |
+
"grad_norm": 7.812302112579346,
|
14059 |
+
"learning_rate": 9.187415121950194e-07,
|
14060 |
+
"loss": 0.2442,
|
14061 |
+
"step": 1848
|
14062 |
+
},
|
14063 |
+
{
|
14064 |
+
"epoch": 0.8765110215690922,
|
14065 |
+
"grad_norm": 3.16363787651062,
|
14066 |
+
"learning_rate": 9.118236266049707e-07,
|
14067 |
+
"loss": 0.1255,
|
14068 |
+
"step": 1849
|
14069 |
+
},
|
14070 |
+
{
|
14071 |
+
"epoch": 0.8769850675515525,
|
14072 |
+
"grad_norm": 5.470139503479004,
|
14073 |
+
"learning_rate": 9.049306401336922e-07,
|
14074 |
+
"loss": 0.1974,
|
14075 |
+
"step": 1850
|
14076 |
+
},
|
14077 |
+
{
|
14078 |
+
"epoch": 0.8774591135340128,
|
14079 |
+
"grad_norm": 3.268472194671631,
|
14080 |
+
"learning_rate": 8.980625716660829e-07,
|
14081 |
+
"loss": 0.0863,
|
14082 |
+
"step": 1851
|
14083 |
+
},
|
14084 |
+
{
|
14085 |
+
"epoch": 0.8779331595164731,
|
14086 |
+
"grad_norm": 2.927609920501709,
|
14087 |
+
"learning_rate": 8.912194400187712e-07,
|
14088 |
+
"loss": 0.0827,
|
14089 |
+
"step": 1852
|
14090 |
+
},
|
14091 |
+
{
|
14092 |
+
"epoch": 0.8784072054989334,
|
14093 |
+
"grad_norm": 6.857902526855469,
|
14094 |
+
"learning_rate": 8.84401263940069e-07,
|
14095 |
+
"loss": 0.1555,
|
14096 |
+
"step": 1853
|
14097 |
+
},
|
14098 |
+
{
|
14099 |
+
"epoch": 0.8788812514813937,
|
14100 |
+
"grad_norm": 4.798774719238281,
|
14101 |
+
"learning_rate": 8.776080621099159e-07,
|
14102 |
+
"loss": 0.1973,
|
14103 |
+
"step": 1854
|
14104 |
+
},
|
14105 |
+
{
|
14106 |
+
"epoch": 0.879355297463854,
|
14107 |
+
"grad_norm": 4.6252946853637695,
|
14108 |
+
"learning_rate": 8.708398531398233e-07,
|
14109 |
+
"loss": 0.1612,
|
14110 |
+
"step": 1855
|
14111 |
+
},
|
14112 |
+
{
|
14113 |
+
"epoch": 0.8798293434463142,
|
14114 |
+
"grad_norm": 4.394217491149902,
|
14115 |
+
"learning_rate": 8.640966555728369e-07,
|
14116 |
+
"loss": 0.1261,
|
14117 |
+
"step": 1856
|
14118 |
+
},
|
14119 |
+
{
|
14120 |
+
"epoch": 0.8803033894287746,
|
14121 |
+
"grad_norm": 6.826826095581055,
|
14122 |
+
"learning_rate": 8.573784878834734e-07,
|
14123 |
+
"loss": 0.1587,
|
14124 |
+
"step": 1857
|
14125 |
+
},
|
14126 |
+
{
|
14127 |
+
"epoch": 0.8807774354112349,
|
14128 |
+
"grad_norm": 10.54698657989502,
|
14129 |
+
"learning_rate": 8.506853684776773e-07,
|
14130 |
+
"loss": 0.154,
|
14131 |
+
"step": 1858
|
14132 |
+
},
|
14133 |
+
{
|
14134 |
+
"epoch": 0.8812514813936951,
|
14135 |
+
"grad_norm": 4.272285461425781,
|
14136 |
+
"learning_rate": 8.440173156927612e-07,
|
14137 |
+
"loss": 0.1157,
|
14138 |
+
"step": 1859
|
14139 |
+
},
|
14140 |
+
{
|
14141 |
+
"epoch": 0.8817255273761555,
|
14142 |
+
"grad_norm": 5.016007900238037,
|
14143 |
+
"learning_rate": 8.373743477973739e-07,
|
14144 |
+
"loss": 0.2076,
|
14145 |
+
"step": 1860
|
14146 |
+
},
|
14147 |
+
{
|
14148 |
+
"epoch": 0.8817255273761555,
|
14149 |
+
"eval_accuracy": 0.9935587761674718,
|
14150 |
+
"eval_f1": 0.9272727272727272,
|
14151 |
+
"eval_loss": 0.012083540670573711,
|
14152 |
+
"eval_precision": 0.8793103448275862,
|
14153 |
+
"eval_recall": 0.9807692307692307,
|
14154 |
+
"eval_runtime": 49.9345,
|
14155 |
+
"eval_samples_per_second": 5.427,
|
14156 |
+
"eval_steps_per_second": 0.18,
|
14157 |
+
"step": 1860
|
14158 |
+
},
|
14159 |
+
{
|
14160 |
+
"epoch": 0.8821995733586158,
|
14161 |
+
"grad_norm": 5.868921756744385,
|
14162 |
+
"learning_rate": 8.307564829914272e-07,
|
14163 |
+
"loss": 0.168,
|
14164 |
+
"step": 1861
|
14165 |
+
},
|
14166 |
+
{
|
14167 |
+
"epoch": 0.8826736193410761,
|
14168 |
+
"grad_norm": 8.008037567138672,
|
14169 |
+
"learning_rate": 8.241637394060619e-07,
|
14170 |
+
"loss": 0.0711,
|
14171 |
+
"step": 1862
|
14172 |
+
},
|
14173 |
+
{
|
14174 |
+
"epoch": 0.8831476653235364,
|
14175 |
+
"grad_norm": 4.42324686050415,
|
14176 |
+
"learning_rate": 8.175961351035943e-07,
|
14177 |
+
"loss": 0.0957,
|
14178 |
+
"step": 1863
|
14179 |
+
},
|
14180 |
+
{
|
14181 |
+
"epoch": 0.8836217113059966,
|
14182 |
+
"grad_norm": 5.00337553024292,
|
14183 |
+
"learning_rate": 8.110536880774655e-07,
|
14184 |
+
"loss": 0.1814,
|
14185 |
+
"step": 1864
|
14186 |
+
},
|
14187 |
+
{
|
14188 |
+
"epoch": 0.884095757288457,
|
14189 |
+
"grad_norm": 4.169017791748047,
|
14190 |
+
"learning_rate": 8.045364162521884e-07,
|
14191 |
+
"loss": 0.112,
|
14192 |
+
"step": 1865
|
14193 |
+
},
|
14194 |
+
{
|
14195 |
+
"epoch": 0.8845698032709173,
|
14196 |
+
"grad_norm": 8.79692554473877,
|
14197 |
+
"learning_rate": 7.98044337483308e-07,
|
14198 |
+
"loss": 0.2539,
|
14199 |
+
"step": 1866
|
14200 |
+
},
|
14201 |
+
{
|
14202 |
+
"epoch": 0.8850438492533775,
|
14203 |
+
"grad_norm": 6.905977725982666,
|
14204 |
+
"learning_rate": 7.915774695573452e-07,
|
14205 |
+
"loss": 0.1628,
|
14206 |
+
"step": 1867
|
14207 |
+
},
|
14208 |
+
{
|
14209 |
+
"epoch": 0.8855178952358379,
|
14210 |
+
"grad_norm": 3.759481430053711,
|
14211 |
+
"learning_rate": 7.851358301917511e-07,
|
14212 |
+
"loss": 0.17,
|
14213 |
+
"step": 1868
|
14214 |
+
},
|
14215 |
+
{
|
14216 |
+
"epoch": 0.8859919412182982,
|
14217 |
+
"grad_norm": 4.754873275756836,
|
14218 |
+
"learning_rate": 7.787194370348549e-07,
|
14219 |
+
"loss": 0.2469,
|
14220 |
+
"step": 1869
|
14221 |
+
},
|
14222 |
+
{
|
14223 |
+
"epoch": 0.8864659872007584,
|
14224 |
+
"grad_norm": 5.5656280517578125,
|
14225 |
+
"learning_rate": 7.723283076658217e-07,
|
14226 |
+
"loss": 0.1551,
|
14227 |
+
"step": 1870
|
14228 |
+
},
|
14229 |
+
{
|
14230 |
+
"epoch": 0.8869400331832188,
|
14231 |
+
"grad_norm": 2.3625526428222656,
|
14232 |
+
"learning_rate": 7.659624595945969e-07,
|
14233 |
+
"loss": 0.0846,
|
14234 |
+
"step": 1871
|
14235 |
+
},
|
14236 |
+
{
|
14237 |
+
"epoch": 0.887414079165679,
|
14238 |
+
"grad_norm": 10.592917442321777,
|
14239 |
+
"learning_rate": 7.596219102618652e-07,
|
14240 |
+
"loss": 0.2762,
|
14241 |
+
"step": 1872
|
14242 |
+
},
|
14243 |
+
{
|
14244 |
+
"epoch": 0.8878881251481394,
|
14245 |
+
"grad_norm": 5.2067952156066895,
|
14246 |
+
"learning_rate": 7.533066770389985e-07,
|
14247 |
+
"loss": 0.1768,
|
14248 |
+
"step": 1873
|
14249 |
+
},
|
14250 |
+
{
|
14251 |
+
"epoch": 0.8883621711305997,
|
14252 |
+
"grad_norm": 4.715292930603027,
|
14253 |
+
"learning_rate": 7.470167772280091e-07,
|
14254 |
+
"loss": 0.1107,
|
14255 |
+
"step": 1874
|
14256 |
+
},
|
14257 |
+
{
|
14258 |
+
"epoch": 0.8888362171130599,
|
14259 |
+
"grad_norm": 3.512718439102173,
|
14260 |
+
"learning_rate": 7.40752228061502e-07,
|
14261 |
+
"loss": 0.1145,
|
14262 |
+
"step": 1875
|
14263 |
+
},
|
14264 |
+
{
|
14265 |
+
"epoch": 0.8893102630955203,
|
14266 |
+
"grad_norm": 3.8536527156829834,
|
14267 |
+
"learning_rate": 7.345130467026318e-07,
|
14268 |
+
"loss": 0.1473,
|
14269 |
+
"step": 1876
|
14270 |
+
},
|
14271 |
+
{
|
14272 |
+
"epoch": 0.8897843090779806,
|
14273 |
+
"grad_norm": 3.4637436866760254,
|
14274 |
+
"learning_rate": 7.282992502450447e-07,
|
14275 |
+
"loss": 0.1661,
|
14276 |
+
"step": 1877
|
14277 |
+
},
|
14278 |
+
{
|
14279 |
+
"epoch": 0.8902583550604408,
|
14280 |
+
"grad_norm": 2.695815324783325,
|
14281 |
+
"learning_rate": 7.221108557128509e-07,
|
14282 |
+
"loss": 0.139,
|
14283 |
+
"step": 1878
|
14284 |
+
},
|
14285 |
+
{
|
14286 |
+
"epoch": 0.8907324010429012,
|
14287 |
+
"grad_norm": 4.534758567810059,
|
14288 |
+
"learning_rate": 7.159478800605546e-07,
|
14289 |
+
"loss": 0.1425,
|
14290 |
+
"step": 1879
|
14291 |
+
},
|
14292 |
+
{
|
14293 |
+
"epoch": 0.8912064470253614,
|
14294 |
+
"grad_norm": 7.158409595489502,
|
14295 |
+
"learning_rate": 7.098103401730272e-07,
|
14296 |
+
"loss": 0.1628,
|
14297 |
+
"step": 1880
|
14298 |
+
},
|
14299 |
+
{
|
14300 |
+
"epoch": 0.8912064470253614,
|
14301 |
+
"eval_accuracy": 0.9935587761674718,
|
14302 |
+
"eval_f1": 0.9272727272727272,
|
14303 |
+
"eval_loss": 0.012087295763194561,
|
14304 |
+
"eval_precision": 0.8793103448275862,
|
14305 |
+
"eval_recall": 0.9807692307692307,
|
14306 |
+
"eval_runtime": 49.8216,
|
14307 |
+
"eval_samples_per_second": 5.439,
|
14308 |
+
"eval_steps_per_second": 0.181,
|
14309 |
+
"step": 1880
|
14310 |
+
},
|
14311 |
+
{
|
14312 |
+
"epoch": 0.8916804930078217,
|
14313 |
+
"grad_norm": 4.419368267059326,
|
14314 |
+
"learning_rate": 7.03698252865449e-07,
|
14315 |
+
"loss": 0.0986,
|
14316 |
+
"step": 1881
|
14317 |
+
},
|
14318 |
+
{
|
14319 |
+
"epoch": 0.8921545389902821,
|
14320 |
+
"grad_norm": 5.9724931716918945,
|
14321 |
+
"learning_rate": 6.976116348832684e-07,
|
14322 |
+
"loss": 0.2064,
|
14323 |
+
"step": 1882
|
14324 |
+
},
|
14325 |
+
{
|
14326 |
+
"epoch": 0.8926285849727423,
|
14327 |
+
"grad_norm": 4.130607604980469,
|
14328 |
+
"learning_rate": 6.915505029021552e-07,
|
14329 |
+
"loss": 0.1445,
|
14330 |
+
"step": 1883
|
14331 |
+
},
|
14332 |
+
{
|
14333 |
+
"epoch": 0.8931026309552027,
|
14334 |
+
"grad_norm": 4.273713111877441,
|
14335 |
+
"learning_rate": 6.855148735279527e-07,
|
14336 |
+
"loss": 0.1389,
|
14337 |
+
"step": 1884
|
14338 |
+
},
|
14339 |
+
{
|
14340 |
+
"epoch": 0.893576676937663,
|
14341 |
+
"grad_norm": 5.399996280670166,
|
14342 |
+
"learning_rate": 6.795047632966379e-07,
|
14343 |
+
"loss": 0.1461,
|
14344 |
+
"step": 1885
|
14345 |
+
},
|
14346 |
+
{
|
14347 |
+
"epoch": 0.8940507229201232,
|
14348 |
+
"grad_norm": 6.056548118591309,
|
14349 |
+
"learning_rate": 6.735201886742671e-07,
|
14350 |
+
"loss": 0.1935,
|
14351 |
+
"step": 1886
|
14352 |
+
},
|
14353 |
+
{
|
14354 |
+
"epoch": 0.8945247689025836,
|
14355 |
+
"grad_norm": 5.537142276763916,
|
14356 |
+
"learning_rate": 6.675611660569403e-07,
|
14357 |
+
"loss": 0.1816,
|
14358 |
+
"step": 1887
|
14359 |
+
},
|
14360 |
+
{
|
14361 |
+
"epoch": 0.8949988148850438,
|
14362 |
+
"grad_norm": 6.469786167144775,
|
14363 |
+
"learning_rate": 6.616277117707493e-07,
|
14364 |
+
"loss": 0.1772,
|
14365 |
+
"step": 1888
|
14366 |
+
},
|
14367 |
+
{
|
14368 |
+
"epoch": 0.8954728608675041,
|
14369 |
+
"grad_norm": 4.300382137298584,
|
14370 |
+
"learning_rate": 6.55719842071737e-07,
|
14371 |
+
"loss": 0.0932,
|
14372 |
+
"step": 1889
|
14373 |
+
},
|
14374 |
+
{
|
14375 |
+
"epoch": 0.8959469068499645,
|
14376 |
+
"grad_norm": 6.920015335083008,
|
14377 |
+
"learning_rate": 6.498375731458529e-07,
|
14378 |
+
"loss": 0.208,
|
14379 |
+
"step": 1890
|
14380 |
+
},
|
14381 |
+
{
|
14382 |
+
"epoch": 0.8964209528324247,
|
14383 |
+
"grad_norm": 5.358169078826904,
|
14384 |
+
"learning_rate": 6.439809211089043e-07,
|
14385 |
+
"loss": 0.1518,
|
14386 |
+
"step": 1891
|
14387 |
+
},
|
14388 |
+
{
|
14389 |
+
"epoch": 0.896894998814885,
|
14390 |
+
"grad_norm": 9.420503616333008,
|
14391 |
+
"learning_rate": 6.381499020065163e-07,
|
14392 |
+
"loss": 0.1817,
|
14393 |
+
"step": 1892
|
14394 |
+
},
|
14395 |
+
{
|
14396 |
+
"epoch": 0.8973690447973454,
|
14397 |
+
"grad_norm": 5.0321855545043945,
|
14398 |
+
"learning_rate": 6.323445318140886e-07,
|
14399 |
+
"loss": 0.1786,
|
14400 |
+
"step": 1893
|
14401 |
+
},
|
14402 |
+
{
|
14403 |
+
"epoch": 0.8978430907798056,
|
14404 |
+
"grad_norm": 4.13561487197876,
|
14405 |
+
"learning_rate": 6.265648264367452e-07,
|
14406 |
+
"loss": 0.1003,
|
14407 |
+
"step": 1894
|
14408 |
+
},
|
14409 |
+
{
|
14410 |
+
"epoch": 0.898317136762266,
|
14411 |
+
"grad_norm": 7.733060359954834,
|
14412 |
+
"learning_rate": 6.20810801709305e-07,
|
14413 |
+
"loss": 0.216,
|
14414 |
+
"step": 1895
|
14415 |
+
},
|
14416 |
+
{
|
14417 |
+
"epoch": 0.8987911827447262,
|
14418 |
+
"grad_norm": 2.7273457050323486,
|
14419 |
+
"learning_rate": 6.15082473396218e-07,
|
14420 |
+
"loss": 0.1149,
|
14421 |
+
"step": 1896
|
14422 |
+
},
|
14423 |
+
{
|
14424 |
+
"epoch": 0.8992652287271865,
|
14425 |
+
"grad_norm": 2.0938057899475098,
|
14426 |
+
"learning_rate": 6.093798571915389e-07,
|
14427 |
+
"loss": 0.0787,
|
14428 |
+
"step": 1897
|
14429 |
+
},
|
14430 |
+
{
|
14431 |
+
"epoch": 0.8997392747096469,
|
14432 |
+
"grad_norm": 6.044375896453857,
|
14433 |
+
"learning_rate": 6.037029687188767e-07,
|
14434 |
+
"loss": 0.1878,
|
14435 |
+
"step": 1898
|
14436 |
+
},
|
14437 |
+
{
|
14438 |
+
"epoch": 0.9002133206921071,
|
14439 |
+
"grad_norm": 2.365513563156128,
|
14440 |
+
"learning_rate": 5.980518235313549e-07,
|
14441 |
+
"loss": 0.1065,
|
14442 |
+
"step": 1899
|
14443 |
+
},
|
14444 |
+
{
|
14445 |
+
"epoch": 0.9006873666745674,
|
14446 |
+
"grad_norm": 4.049135684967041,
|
14447 |
+
"learning_rate": 5.924264371115652e-07,
|
14448 |
+
"loss": 0.156,
|
14449 |
+
"step": 1900
|
14450 |
+
},
|
14451 |
+
{
|
14452 |
+
"epoch": 0.9006873666745674,
|
14453 |
+
"eval_accuracy": 0.9935587761674718,
|
14454 |
+
"eval_f1": 0.9272727272727272,
|
14455 |
+
"eval_loss": 0.012837257236242294,
|
14456 |
+
"eval_precision": 0.8793103448275862,
|
14457 |
+
"eval_recall": 0.9807692307692307,
|
14458 |
+
"eval_runtime": 49.4558,
|
14459 |
+
"eval_samples_per_second": 5.48,
|
14460 |
+
"eval_steps_per_second": 0.182,
|
14461 |
+
"step": 1900
|
14462 |
}
|
14463 |
],
|
14464 |
"logging_steps": 1,
|
|
|
14478 |
"attributes": {}
|
14479 |
}
|
14480 |
},
|
14481 |
+
"total_flos": 5.0749333019243315e+17,
|
14482 |
"train_batch_size": 8,
|
14483 |
"trial_name": null,
|
14484 |
"trial_params": null
|