Training in progress, step 2000, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97e5f90b02b18eee0439efcd1e11c562003887a0a8341c65f3c61afc97e6ce91
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa33fe523d912fae3cb37eeb6b60af785266354c6c31911ecc4617df910b0be2
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe1bbd6e31aea6726660dc7dac9d7b7b788a128231286077750bd3b7ceeb5a97
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cefe39a14475612351b6fccd8db8eec85a931549215bf24bafd93144edce8a5
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57079b1ad6dfda7f50c73be4cc9a2461ca37b66b4a9e6186c57fa89a2fbb32dc
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b90c40fdfe265353374604f556a9c76615bc263d7688eb1dc6fa1733158babe8
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:834bf46634f0752bdb674694ee8a0f7d157d699667caf2b5dc77591f5ada58ec
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20a45e516450ff75e5f30798a9fd5c55d60506aebd3e02c1c8b581ae0fd8ecb1
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f258b75154d2aee1a76c51ba8c53eb9ff1afc1684f65be22d906efc966e2f31d
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2cd1c1229272286316fc487e083e3c0dbb26b851fd444bc5cfa3906d05744d
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54bd2f8ba2fbed41edcaf0b31a7cc52ace7dc5e888e79b744825e45b024f9c0c
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20daa4d654ef46df708f18dbbf7bc707be5815cfc90479bf1752f4b1f5183f51
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0c332a71d8fb512346f2df9841021fb4baac7da78dd4eb8a3c1b75157d59e96
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14459,6 +14459,766 @@
|
|
14459 |
"eval_samples_per_second": 5.48,
|
14460 |
"eval_steps_per_second": 0.182,
|
14461 |
"step": 1900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14462 |
}
|
14463 |
],
|
14464 |
"logging_steps": 1,
|
@@ -14478,7 +15238,7 @@
|
|
14478 |
"attributes": {}
|
14479 |
}
|
14480 |
},
|
14481 |
-
"total_flos": 5.
|
14482 |
"train_batch_size": 8,
|
14483 |
"trial_name": null,
|
14484 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9480919649205973,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 2000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14459 |
"eval_samples_per_second": 5.48,
|
14460 |
"eval_steps_per_second": 0.182,
|
14461 |
"step": 1900
|
14462 |
+
},
|
14463 |
+
{
|
14464 |
+
"epoch": 0.9011614126570278,
|
14465 |
+
"grad_norm": 4.734807014465332,
|
14466 |
+
"learning_rate": 5.868268248715292e-07,
|
14467 |
+
"loss": 0.2087,
|
14468 |
+
"step": 1901
|
14469 |
+
},
|
14470 |
+
{
|
14471 |
+
"epoch": 0.901635458639488,
|
14472 |
+
"grad_norm": 3.426779270172119,
|
14473 |
+
"learning_rate": 5.812530021526541e-07,
|
14474 |
+
"loss": 0.1254,
|
14475 |
+
"step": 1902
|
14476 |
+
},
|
14477 |
+
{
|
14478 |
+
"epoch": 0.9021095046219483,
|
14479 |
+
"grad_norm": 6.730406761169434,
|
14480 |
+
"learning_rate": 5.7570498422569e-07,
|
14481 |
+
"loss": 0.1027,
|
14482 |
+
"step": 1903
|
14483 |
+
},
|
14484 |
+
{
|
14485 |
+
"epoch": 0.9025835506044086,
|
14486 |
+
"grad_norm": 7.400092601776123,
|
14487 |
+
"learning_rate": 5.701827862906894e-07,
|
14488 |
+
"loss": 0.2117,
|
14489 |
+
"step": 1904
|
14490 |
+
},
|
14491 |
+
{
|
14492 |
+
"epoch": 0.9030575965868689,
|
14493 |
+
"grad_norm": 5.896395206451416,
|
14494 |
+
"learning_rate": 5.646864234769644e-07,
|
14495 |
+
"loss": 0.2147,
|
14496 |
+
"step": 1905
|
14497 |
+
},
|
14498 |
+
{
|
14499 |
+
"epoch": 0.9035316425693293,
|
14500 |
+
"grad_norm": 4.465495586395264,
|
14501 |
+
"learning_rate": 5.592159108430472e-07,
|
14502 |
+
"loss": 0.0945,
|
14503 |
+
"step": 1906
|
14504 |
+
},
|
14505 |
+
{
|
14506 |
+
"epoch": 0.9040056885517895,
|
14507 |
+
"grad_norm": 3.8033761978149414,
|
14508 |
+
"learning_rate": 5.537712633766479e-07,
|
14509 |
+
"loss": 0.0876,
|
14510 |
+
"step": 1907
|
14511 |
+
},
|
14512 |
+
{
|
14513 |
+
"epoch": 0.9044797345342498,
|
14514 |
+
"grad_norm": 4.422494888305664,
|
14515 |
+
"learning_rate": 5.483524959946097e-07,
|
14516 |
+
"loss": 0.0863,
|
14517 |
+
"step": 1908
|
14518 |
+
},
|
14519 |
+
{
|
14520 |
+
"epoch": 0.9049537805167102,
|
14521 |
+
"grad_norm": 5.122066974639893,
|
14522 |
+
"learning_rate": 5.429596235428746e-07,
|
14523 |
+
"loss": 0.1666,
|
14524 |
+
"step": 1909
|
14525 |
+
},
|
14526 |
+
{
|
14527 |
+
"epoch": 0.9054278264991704,
|
14528 |
+
"grad_norm": 4.508274078369141,
|
14529 |
+
"learning_rate": 5.375926607964399e-07,
|
14530 |
+
"loss": 0.1302,
|
14531 |
+
"step": 1910
|
14532 |
+
},
|
14533 |
+
{
|
14534 |
+
"epoch": 0.9059018724816307,
|
14535 |
+
"grad_norm": 5.495452880859375,
|
14536 |
+
"learning_rate": 5.322516224593143e-07,
|
14537 |
+
"loss": 0.1386,
|
14538 |
+
"step": 1911
|
14539 |
+
},
|
14540 |
+
{
|
14541 |
+
"epoch": 0.906375918464091,
|
14542 |
+
"grad_norm": 4.296015739440918,
|
14543 |
+
"learning_rate": 5.269365231644851e-07,
|
14544 |
+
"loss": 0.1947,
|
14545 |
+
"step": 1912
|
14546 |
+
},
|
14547 |
+
{
|
14548 |
+
"epoch": 0.9068499644465513,
|
14549 |
+
"grad_norm": 5.446202278137207,
|
14550 |
+
"learning_rate": 5.216473774738706e-07,
|
14551 |
+
"loss": 0.2568,
|
14552 |
+
"step": 1913
|
14553 |
+
},
|
14554 |
+
{
|
14555 |
+
"epoch": 0.9073240104290116,
|
14556 |
+
"grad_norm": 3.5428926944732666,
|
14557 |
+
"learning_rate": 5.163841998782837e-07,
|
14558 |
+
"loss": 0.0723,
|
14559 |
+
"step": 1914
|
14560 |
+
},
|
14561 |
+
{
|
14562 |
+
"epoch": 0.9077980564114719,
|
14563 |
+
"grad_norm": 3.931621789932251,
|
14564 |
+
"learning_rate": 5.111470047973932e-07,
|
14565 |
+
"loss": 0.1773,
|
14566 |
+
"step": 1915
|
14567 |
+
},
|
14568 |
+
{
|
14569 |
+
"epoch": 0.9082721023939322,
|
14570 |
+
"grad_norm": 2.9896233081817627,
|
14571 |
+
"learning_rate": 5.059358065796816e-07,
|
14572 |
+
"loss": 0.1289,
|
14573 |
+
"step": 1916
|
14574 |
+
},
|
14575 |
+
{
|
14576 |
+
"epoch": 0.9087461483763926,
|
14577 |
+
"grad_norm": 4.83162784576416,
|
14578 |
+
"learning_rate": 5.007506195024059e-07,
|
14579 |
+
"loss": 0.1292,
|
14580 |
+
"step": 1917
|
14581 |
+
},
|
14582 |
+
{
|
14583 |
+
"epoch": 0.9092201943588528,
|
14584 |
+
"grad_norm": 3.8033645153045654,
|
14585 |
+
"learning_rate": 4.955914577715615e-07,
|
14586 |
+
"loss": 0.1018,
|
14587 |
+
"step": 1918
|
14588 |
+
},
|
14589 |
+
{
|
14590 |
+
"epoch": 0.9096942403413131,
|
14591 |
+
"grad_norm": 3.928222417831421,
|
14592 |
+
"learning_rate": 4.904583355218429e-07,
|
14593 |
+
"loss": 0.1198,
|
14594 |
+
"step": 1919
|
14595 |
+
},
|
14596 |
+
{
|
14597 |
+
"epoch": 0.9101682863237734,
|
14598 |
+
"grad_norm": 5.3848557472229,
|
14599 |
+
"learning_rate": 4.853512668166005e-07,
|
14600 |
+
"loss": 0.0762,
|
14601 |
+
"step": 1920
|
14602 |
+
},
|
14603 |
+
{
|
14604 |
+
"epoch": 0.9101682863237734,
|
14605 |
+
"eval_accuracy": 0.9935587761674718,
|
14606 |
+
"eval_f1": 0.9272727272727272,
|
14607 |
+
"eval_loss": 0.012816701084375381,
|
14608 |
+
"eval_precision": 0.8793103448275862,
|
14609 |
+
"eval_recall": 0.9807692307692307,
|
14610 |
+
"eval_runtime": 50.4456,
|
14611 |
+
"eval_samples_per_second": 5.372,
|
14612 |
+
"eval_steps_per_second": 0.178,
|
14613 |
+
"step": 1920
|
14614 |
+
},
|
14615 |
+
{
|
14616 |
+
"epoch": 0.9106423323062337,
|
14617 |
+
"grad_norm": 5.235629558563232,
|
14618 |
+
"learning_rate": 4.802702656478053e-07,
|
14619 |
+
"loss": 0.1394,
|
14620 |
+
"step": 1921
|
14621 |
+
},
|
14622 |
+
{
|
14623 |
+
"epoch": 0.911116378288694,
|
14624 |
+
"grad_norm": 6.737102031707764,
|
14625 |
+
"learning_rate": 4.752153459360143e-07,
|
14626 |
+
"loss": 0.1753,
|
14627 |
+
"step": 1922
|
14628 |
+
},
|
14629 |
+
{
|
14630 |
+
"epoch": 0.9115904242711543,
|
14631 |
+
"grad_norm": 6.279690265655518,
|
14632 |
+
"learning_rate": 4.701865215303236e-07,
|
14633 |
+
"loss": 0.1381,
|
14634 |
+
"step": 1923
|
14635 |
+
},
|
14636 |
+
{
|
14637 |
+
"epoch": 0.9120644702536146,
|
14638 |
+
"grad_norm": 5.9221086502075195,
|
14639 |
+
"learning_rate": 4.6518380620833694e-07,
|
14640 |
+
"loss": 0.1984,
|
14641 |
+
"step": 1924
|
14642 |
+
},
|
14643 |
+
{
|
14644 |
+
"epoch": 0.9125385162360748,
|
14645 |
+
"grad_norm": 3.1503348350524902,
|
14646 |
+
"learning_rate": 4.602072136761282e-07,
|
14647 |
+
"loss": 0.1374,
|
14648 |
+
"step": 1925
|
14649 |
+
},
|
14650 |
+
{
|
14651 |
+
"epoch": 0.9130125622185352,
|
14652 |
+
"grad_norm": 5.046225070953369,
|
14653 |
+
"learning_rate": 4.5525675756819987e-07,
|
14654 |
+
"loss": 0.1439,
|
14655 |
+
"step": 1926
|
14656 |
+
},
|
14657 |
+
{
|
14658 |
+
"epoch": 0.9134866082009955,
|
14659 |
+
"grad_norm": 4.287316799163818,
|
14660 |
+
"learning_rate": 4.503324514474483e-07,
|
14661 |
+
"loss": 0.1769,
|
14662 |
+
"step": 1927
|
14663 |
+
},
|
14664 |
+
{
|
14665 |
+
"epoch": 0.9139606541834558,
|
14666 |
+
"grad_norm": 3.769968032836914,
|
14667 |
+
"learning_rate": 4.4543430880512604e-07,
|
14668 |
+
"loss": 0.1381,
|
14669 |
+
"step": 1928
|
14670 |
+
},
|
14671 |
+
{
|
14672 |
+
"epoch": 0.9144347001659161,
|
14673 |
+
"grad_norm": 3.910022020339966,
|
14674 |
+
"learning_rate": 4.4056234306080415e-07,
|
14675 |
+
"loss": 0.142,
|
14676 |
+
"step": 1929
|
14677 |
+
},
|
14678 |
+
{
|
14679 |
+
"epoch": 0.9149087461483764,
|
14680 |
+
"grad_norm": 2.3405351638793945,
|
14681 |
+
"learning_rate": 4.357165675623376e-07,
|
14682 |
+
"loss": 0.1014,
|
14683 |
+
"step": 1930
|
14684 |
+
},
|
14685 |
+
{
|
14686 |
+
"epoch": 0.9153827921308367,
|
14687 |
+
"grad_norm": 5.19395637512207,
|
14688 |
+
"learning_rate": 4.3089699558582776e-07,
|
14689 |
+
"loss": 0.1192,
|
14690 |
+
"step": 1931
|
14691 |
+
},
|
14692 |
+
{
|
14693 |
+
"epoch": 0.915856838113297,
|
14694 |
+
"grad_norm": 4.177900314331055,
|
14695 |
+
"learning_rate": 4.261036403355823e-07,
|
14696 |
+
"loss": 0.1327,
|
14697 |
+
"step": 1932
|
14698 |
+
},
|
14699 |
+
{
|
14700 |
+
"epoch": 0.9163308840957572,
|
14701 |
+
"grad_norm": 8.132122039794922,
|
14702 |
+
"learning_rate": 4.2133651494408513e-07,
|
14703 |
+
"loss": 0.1815,
|
14704 |
+
"step": 1933
|
14705 |
+
},
|
14706 |
+
{
|
14707 |
+
"epoch": 0.9168049300782176,
|
14708 |
+
"grad_norm": 4.363158702850342,
|
14709 |
+
"learning_rate": 4.165956324719556e-07,
|
14710 |
+
"loss": 0.1009,
|
14711 |
+
"step": 1934
|
14712 |
+
},
|
14713 |
+
{
|
14714 |
+
"epoch": 0.9172789760606779,
|
14715 |
+
"grad_norm": 2.370462417602539,
|
14716 |
+
"learning_rate": 4.1188100590791704e-07,
|
14717 |
+
"loss": 0.0722,
|
14718 |
+
"step": 1935
|
14719 |
+
},
|
14720 |
+
{
|
14721 |
+
"epoch": 0.9177530220431381,
|
14722 |
+
"grad_norm": 3.8629467487335205,
|
14723 |
+
"learning_rate": 4.0719264816875713e-07,
|
14724 |
+
"loss": 0.1657,
|
14725 |
+
"step": 1936
|
14726 |
+
},
|
14727 |
+
{
|
14728 |
+
"epoch": 0.9182270680255985,
|
14729 |
+
"grad_norm": 5.270659923553467,
|
14730 |
+
"learning_rate": 4.0253057209929556e-07,
|
14731 |
+
"loss": 0.1906,
|
14732 |
+
"step": 1937
|
14733 |
+
},
|
14734 |
+
{
|
14735 |
+
"epoch": 0.9187011140080588,
|
14736 |
+
"grad_norm": 3.739020824432373,
|
14737 |
+
"learning_rate": 3.9789479047234293e-07,
|
14738 |
+
"loss": 0.1354,
|
14739 |
+
"step": 1938
|
14740 |
+
},
|
14741 |
+
{
|
14742 |
+
"epoch": 0.9191751599905191,
|
14743 |
+
"grad_norm": 3.877326011657715,
|
14744 |
+
"learning_rate": 3.9328531598867517e-07,
|
14745 |
+
"loss": 0.1159,
|
14746 |
+
"step": 1939
|
14747 |
+
},
|
14748 |
+
{
|
14749 |
+
"epoch": 0.9196492059729794,
|
14750 |
+
"grad_norm": 3.6506576538085938,
|
14751 |
+
"learning_rate": 3.887021612769937e-07,
|
14752 |
+
"loss": 0.1372,
|
14753 |
+
"step": 1940
|
14754 |
+
},
|
14755 |
+
{
|
14756 |
+
"epoch": 0.9196492059729794,
|
14757 |
+
"eval_accuracy": 0.9935587761674718,
|
14758 |
+
"eval_f1": 0.9272727272727272,
|
14759 |
+
"eval_loss": 0.012422804720699787,
|
14760 |
+
"eval_precision": 0.8793103448275862,
|
14761 |
+
"eval_recall": 0.9807692307692307,
|
14762 |
+
"eval_runtime": 49.6153,
|
14763 |
+
"eval_samples_per_second": 5.462,
|
14764 |
+
"eval_steps_per_second": 0.181,
|
14765 |
+
"step": 1940
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 0.9201232519554396,
|
14769 |
+
"grad_norm": 4.246330261230469,
|
14770 |
+
"learning_rate": 3.841453388938876e-07,
|
14771 |
+
"loss": 0.1756,
|
14772 |
+
"step": 1941
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 0.9205972979379,
|
14776 |
+
"grad_norm": 4.453822135925293,
|
14777 |
+
"learning_rate": 3.7961486132380487e-07,
|
14778 |
+
"loss": 0.0994,
|
14779 |
+
"step": 1942
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 0.9210713439203603,
|
14783 |
+
"grad_norm": 3.8408455848693848,
|
14784 |
+
"learning_rate": 3.7511074097901557e-07,
|
14785 |
+
"loss": 0.1043,
|
14786 |
+
"step": 1943
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 0.9215453899028205,
|
14790 |
+
"grad_norm": 4.067150592803955,
|
14791 |
+
"learning_rate": 3.7063299019957867e-07,
|
14792 |
+
"loss": 0.1134,
|
14793 |
+
"step": 1944
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 0.9220194358852809,
|
14797 |
+
"grad_norm": 6.159415245056152,
|
14798 |
+
"learning_rate": 3.661816212533076e-07,
|
14799 |
+
"loss": 0.1361,
|
14800 |
+
"step": 1945
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 0.9224934818677412,
|
14804 |
+
"grad_norm": 2.458495616912842,
|
14805 |
+
"learning_rate": 3.617566463357336e-07,
|
14806 |
+
"loss": 0.0948,
|
14807 |
+
"step": 1946
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 0.9229675278502014,
|
14811 |
+
"grad_norm": 6.143227577209473,
|
14812 |
+
"learning_rate": 3.5735807757008354e-07,
|
14813 |
+
"loss": 0.2176,
|
14814 |
+
"step": 1947
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 0.9234415738326618,
|
14818 |
+
"grad_norm": 3.9085354804992676,
|
14819 |
+
"learning_rate": 3.529859270072289e-07,
|
14820 |
+
"loss": 0.1214,
|
14821 |
+
"step": 1948
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 0.923915619815122,
|
14825 |
+
"grad_norm": 4.7195963859558105,
|
14826 |
+
"learning_rate": 3.4864020662566775e-07,
|
14827 |
+
"loss": 0.1125,
|
14828 |
+
"step": 1949
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 0.9243896657975824,
|
14832 |
+
"grad_norm": 4.297183990478516,
|
14833 |
+
"learning_rate": 3.443209283314863e-07,
|
14834 |
+
"loss": 0.1678,
|
14835 |
+
"step": 1950
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 0.9248637117800427,
|
14839 |
+
"grad_norm": 2.1935582160949707,
|
14840 |
+
"learning_rate": 3.4002810395832753e-07,
|
14841 |
+
"loss": 0.0998,
|
14842 |
+
"step": 1951
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 0.9253377577625029,
|
14846 |
+
"grad_norm": 4.309812068939209,
|
14847 |
+
"learning_rate": 3.357617452673545e-07,
|
14848 |
+
"loss": 0.0861,
|
14849 |
+
"step": 1952
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 0.9258118037449633,
|
14853 |
+
"grad_norm": 5.1253743171691895,
|
14854 |
+
"learning_rate": 3.3152186394722506e-07,
|
14855 |
+
"loss": 0.1119,
|
14856 |
+
"step": 1953
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 0.9262858497274236,
|
14860 |
+
"grad_norm": 4.127727031707764,
|
14861 |
+
"learning_rate": 3.27308471614054e-07,
|
14862 |
+
"loss": 0.1562,
|
14863 |
+
"step": 1954
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 0.9267598957098838,
|
14867 |
+
"grad_norm": 3.679004430770874,
|
14868 |
+
"learning_rate": 3.2312157981138626e-07,
|
14869 |
+
"loss": 0.1136,
|
14870 |
+
"step": 1955
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 0.9272339416923442,
|
14874 |
+
"grad_norm": 6.002187728881836,
|
14875 |
+
"learning_rate": 3.189612000101594e-07,
|
14876 |
+
"loss": 0.1292,
|
14877 |
+
"step": 1956
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 0.9277079876748044,
|
14881 |
+
"grad_norm": 4.653674125671387,
|
14882 |
+
"learning_rate": 3.148273436086757e-07,
|
14883 |
+
"loss": 0.1301,
|
14884 |
+
"step": 1957
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 0.9281820336572647,
|
14888 |
+
"grad_norm": 3.5616414546966553,
|
14889 |
+
"learning_rate": 3.107200219325746e-07,
|
14890 |
+
"loss": 0.1325,
|
14891 |
+
"step": 1958
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 0.9286560796397251,
|
14895 |
+
"grad_norm": 3.704566240310669,
|
14896 |
+
"learning_rate": 3.0663924623479337e-07,
|
14897 |
+
"loss": 0.1194,
|
14898 |
+
"step": 1959
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 0.9291301256221853,
|
14902 |
+
"grad_norm": 3.3392817974090576,
|
14903 |
+
"learning_rate": 3.0258502769553996e-07,
|
14904 |
+
"loss": 0.0837,
|
14905 |
+
"step": 1960
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 0.9291301256221853,
|
14909 |
+
"eval_accuracy": 0.9935587761674718,
|
14910 |
+
"eval_f1": 0.9272727272727272,
|
14911 |
+
"eval_loss": 0.012340452522039413,
|
14912 |
+
"eval_precision": 0.8793103448275862,
|
14913 |
+
"eval_recall": 0.9807692307692307,
|
14914 |
+
"eval_runtime": 50.1351,
|
14915 |
+
"eval_samples_per_second": 5.405,
|
14916 |
+
"eval_steps_per_second": 0.18,
|
14917 |
+
"step": 1960
|
14918 |
+
},
|
14919 |
+
{
|
14920 |
+
"epoch": 0.9296041716046457,
|
14921 |
+
"grad_norm": 8.908299446105957,
|
14922 |
+
"learning_rate": 2.985573774222661e-07,
|
14923 |
+
"loss": 0.1625,
|
14924 |
+
"step": 1961
|
14925 |
+
},
|
14926 |
+
{
|
14927 |
+
"epoch": 0.930078217587106,
|
14928 |
+
"grad_norm": 3.4975991249084473,
|
14929 |
+
"learning_rate": 2.945563064496326e-07,
|
14930 |
+
"loss": 0.1712,
|
14931 |
+
"step": 1962
|
14932 |
+
},
|
14933 |
+
{
|
14934 |
+
"epoch": 0.9305522635695662,
|
14935 |
+
"grad_norm": 4.6506147384643555,
|
14936 |
+
"learning_rate": 2.905818257394799e-07,
|
14937 |
+
"loss": 0.1451,
|
14938 |
+
"step": 1963
|
14939 |
+
},
|
14940 |
+
{
|
14941 |
+
"epoch": 0.9310263095520266,
|
14942 |
+
"grad_norm": 1.9439915418624878,
|
14943 |
+
"learning_rate": 2.8663394618079875e-07,
|
14944 |
+
"loss": 0.0572,
|
14945 |
+
"step": 1964
|
14946 |
+
},
|
14947 |
+
{
|
14948 |
+
"epoch": 0.9315003555344868,
|
14949 |
+
"grad_norm": 4.1189374923706055,
|
14950 |
+
"learning_rate": 2.827126785897005e-07,
|
14951 |
+
"loss": 0.1361,
|
14952 |
+
"step": 1965
|
14953 |
+
},
|
14954 |
+
{
|
14955 |
+
"epoch": 0.9319744015169471,
|
14956 |
+
"grad_norm": 6.66880989074707,
|
14957 |
+
"learning_rate": 2.78818033709386e-07,
|
14958 |
+
"loss": 0.1701,
|
14959 |
+
"step": 1966
|
14960 |
+
},
|
14961 |
+
{
|
14962 |
+
"epoch": 0.9324484474994075,
|
14963 |
+
"grad_norm": 4.2832794189453125,
|
14964 |
+
"learning_rate": 2.7495002221011757e-07,
|
14965 |
+
"loss": 0.1376,
|
14966 |
+
"step": 1967
|
14967 |
+
},
|
14968 |
+
{
|
14969 |
+
"epoch": 0.9329224934818677,
|
14970 |
+
"grad_norm": 3.8820581436157227,
|
14971 |
+
"learning_rate": 2.7110865468919057e-07,
|
14972 |
+
"loss": 0.1829,
|
14973 |
+
"step": 1968
|
14974 |
+
},
|
14975 |
+
{
|
14976 |
+
"epoch": 0.933396539464328,
|
14977 |
+
"grad_norm": 4.808830261230469,
|
14978 |
+
"learning_rate": 2.672939416708986e-07,
|
14979 |
+
"loss": 0.1535,
|
14980 |
+
"step": 1969
|
14981 |
+
},
|
14982 |
+
{
|
14983 |
+
"epoch": 0.9338705854467884,
|
14984 |
+
"grad_norm": 3.9023189544677734,
|
14985 |
+
"learning_rate": 2.635058936065138e-07,
|
14986 |
+
"loss": 0.1386,
|
14987 |
+
"step": 1970
|
14988 |
+
},
|
14989 |
+
{
|
14990 |
+
"epoch": 0.9343446314292486,
|
14991 |
+
"grad_norm": 8.328058242797852,
|
14992 |
+
"learning_rate": 2.5974452087425437e-07,
|
14993 |
+
"loss": 0.2852,
|
14994 |
+
"step": 1971
|
14995 |
+
},
|
14996 |
+
{
|
14997 |
+
"epoch": 0.934818677411709,
|
14998 |
+
"grad_norm": 7.914390563964844,
|
14999 |
+
"learning_rate": 2.5600983377925046e-07,
|
15000 |
+
"loss": 0.1979,
|
15001 |
+
"step": 1972
|
15002 |
+
},
|
15003 |
+
{
|
15004 |
+
"epoch": 0.9352927233941692,
|
15005 |
+
"grad_norm": 3.548283815383911,
|
15006 |
+
"learning_rate": 2.523018425535251e-07,
|
15007 |
+
"loss": 0.1297,
|
15008 |
+
"step": 1973
|
15009 |
+
},
|
15010 |
+
{
|
15011 |
+
"epoch": 0.9357667693766295,
|
15012 |
+
"grad_norm": 6.728952884674072,
|
15013 |
+
"learning_rate": 2.486205573559608e-07,
|
15014 |
+
"loss": 0.1692,
|
15015 |
+
"step": 1974
|
15016 |
+
},
|
15017 |
+
{
|
15018 |
+
"epoch": 0.9362408153590899,
|
15019 |
+
"grad_norm": 3.3491721153259277,
|
15020 |
+
"learning_rate": 2.4496598827227213e-07,
|
15021 |
+
"loss": 0.0886,
|
15022 |
+
"step": 1975
|
15023 |
+
},
|
15024 |
+
{
|
15025 |
+
"epoch": 0.9367148613415501,
|
15026 |
+
"grad_norm": 5.318295001983643,
|
15027 |
+
"learning_rate": 2.413381453149799e-07,
|
15028 |
+
"loss": 0.1496,
|
15029 |
+
"step": 1976
|
15030 |
+
},
|
15031 |
+
{
|
15032 |
+
"epoch": 0.9371889073240104,
|
15033 |
+
"grad_norm": 8.961012840270996,
|
15034 |
+
"learning_rate": 2.3773703842338125e-07,
|
15035 |
+
"loss": 0.1294,
|
15036 |
+
"step": 1977
|
15037 |
+
},
|
15038 |
+
{
|
15039 |
+
"epoch": 0.9376629533064708,
|
15040 |
+
"grad_norm": 8.02442741394043,
|
15041 |
+
"learning_rate": 2.3416267746352528e-07,
|
15042 |
+
"loss": 0.1405,
|
15043 |
+
"step": 1978
|
15044 |
+
},
|
15045 |
+
{
|
15046 |
+
"epoch": 0.938136999288931,
|
15047 |
+
"grad_norm": 5.480352401733398,
|
15048 |
+
"learning_rate": 2.3061507222818303e-07,
|
15049 |
+
"loss": 0.1055,
|
15050 |
+
"step": 1979
|
15051 |
+
},
|
15052 |
+
{
|
15053 |
+
"epoch": 0.9386110452713913,
|
15054 |
+
"grad_norm": 4.235230445861816,
|
15055 |
+
"learning_rate": 2.2709423243682416e-07,
|
15056 |
+
"loss": 0.1353,
|
15057 |
+
"step": 1980
|
15058 |
+
},
|
15059 |
+
{
|
15060 |
+
"epoch": 0.9386110452713913,
|
15061 |
+
"eval_accuracy": 0.9935587761674718,
|
15062 |
+
"eval_f1": 0.9272727272727272,
|
15063 |
+
"eval_loss": 0.01255668792873621,
|
15064 |
+
"eval_precision": 0.8793103448275862,
|
15065 |
+
"eval_recall": 0.9807692307692307,
|
15066 |
+
"eval_runtime": 49.3085,
|
15067 |
+
"eval_samples_per_second": 5.496,
|
15068 |
+
"eval_steps_per_second": 0.183,
|
15069 |
+
"step": 1980
|
15070 |
+
},
|
15071 |
+
{
|
15072 |
+
"epoch": 0.9390850912538516,
|
15073 |
+
"grad_norm": 3.984555959701538,
|
15074 |
+
"learning_rate": 2.23600167735587e-07,
|
15075 |
+
"loss": 0.1236,
|
15076 |
+
"step": 1981
|
15077 |
+
},
|
15078 |
+
{
|
15079 |
+
"epoch": 0.9395591372363119,
|
15080 |
+
"grad_norm": 5.206995487213135,
|
15081 |
+
"learning_rate": 2.2013288769725194e-07,
|
15082 |
+
"loss": 0.2124,
|
15083 |
+
"step": 1982
|
15084 |
+
},
|
15085 |
+
{
|
15086 |
+
"epoch": 0.9400331832187723,
|
15087 |
+
"grad_norm": 4.533375263214111,
|
15088 |
+
"learning_rate": 2.166924018212202e-07,
|
15089 |
+
"loss": 0.1632,
|
15090 |
+
"step": 1983
|
15091 |
+
},
|
15092 |
+
{
|
15093 |
+
"epoch": 0.9405072292012325,
|
15094 |
+
"grad_norm": 5.9977641105651855,
|
15095 |
+
"learning_rate": 2.132787195334829e-07,
|
15096 |
+
"loss": 0.2039,
|
15097 |
+
"step": 1984
|
15098 |
+
},
|
15099 |
+
{
|
15100 |
+
"epoch": 0.9409812751836928,
|
15101 |
+
"grad_norm": 6.321089744567871,
|
15102 |
+
"learning_rate": 2.0989185018659431e-07,
|
15103 |
+
"loss": 0.1539,
|
15104 |
+
"step": 1985
|
15105 |
+
},
|
15106 |
+
{
|
15107 |
+
"epoch": 0.9414553211661532,
|
15108 |
+
"grad_norm": 4.989069938659668,
|
15109 |
+
"learning_rate": 2.0653180305965194e-07,
|
15110 |
+
"loss": 0.1501,
|
15111 |
+
"step": 1986
|
15112 |
+
},
|
15113 |
+
{
|
15114 |
+
"epoch": 0.9419293671486134,
|
15115 |
+
"grad_norm": 4.138362407684326,
|
15116 |
+
"learning_rate": 2.0319858735826648e-07,
|
15117 |
+
"loss": 0.1388,
|
15118 |
+
"step": 1987
|
15119 |
+
},
|
15120 |
+
{
|
15121 |
+
"epoch": 0.9424034131310737,
|
15122 |
+
"grad_norm": 11.884577751159668,
|
15123 |
+
"learning_rate": 1.9989221221453746e-07,
|
15124 |
+
"loss": 0.2071,
|
15125 |
+
"step": 1988
|
15126 |
+
},
|
15127 |
+
{
|
15128 |
+
"epoch": 0.942877459113534,
|
15129 |
+
"grad_norm": 4.623379707336426,
|
15130 |
+
"learning_rate": 1.966126866870277e-07,
|
15131 |
+
"loss": 0.1529,
|
15132 |
+
"step": 1989
|
15133 |
+
},
|
15134 |
+
{
|
15135 |
+
"epoch": 0.9433515050959943,
|
15136 |
+
"grad_norm": 5.523632526397705,
|
15137 |
+
"learning_rate": 1.9336001976074326e-07,
|
15138 |
+
"loss": 0.2024,
|
15139 |
+
"step": 1990
|
15140 |
+
},
|
15141 |
+
{
|
15142 |
+
"epoch": 0.9438255510784546,
|
15143 |
+
"grad_norm": 4.839412689208984,
|
15144 |
+
"learning_rate": 1.9013422034710016e-07,
|
15145 |
+
"loss": 0.1426,
|
15146 |
+
"step": 1991
|
15147 |
+
},
|
15148 |
+
{
|
15149 |
+
"epoch": 0.9442995970609149,
|
15150 |
+
"grad_norm": 9.59015941619873,
|
15151 |
+
"learning_rate": 1.869352972839067e-07,
|
15152 |
+
"loss": 0.2005,
|
15153 |
+
"step": 1992
|
15154 |
+
},
|
15155 |
+
{
|
15156 |
+
"epoch": 0.9447736430433752,
|
15157 |
+
"grad_norm": 4.234097957611084,
|
15158 |
+
"learning_rate": 1.837632593353389e-07,
|
15159 |
+
"loss": 0.1123,
|
15160 |
+
"step": 1993
|
15161 |
+
},
|
15162 |
+
{
|
15163 |
+
"epoch": 0.9452476890258356,
|
15164 |
+
"grad_norm": 4.442883491516113,
|
15165 |
+
"learning_rate": 1.8061811519191287e-07,
|
15166 |
+
"loss": 0.1053,
|
15167 |
+
"step": 1994
|
15168 |
+
},
|
15169 |
+
{
|
15170 |
+
"epoch": 0.9457217350082958,
|
15171 |
+
"grad_norm": 4.088728904724121,
|
15172 |
+
"learning_rate": 1.7749987347046471e-07,
|
15173 |
+
"loss": 0.0867,
|
15174 |
+
"step": 1995
|
15175 |
+
},
|
15176 |
+
{
|
15177 |
+
"epoch": 0.9461957809907561,
|
15178 |
+
"grad_norm": 4.195045471191406,
|
15179 |
+
"learning_rate": 1.7440854271412288e-07,
|
15180 |
+
"loss": 0.159,
|
15181 |
+
"step": 1996
|
15182 |
+
},
|
15183 |
+
{
|
15184 |
+
"epoch": 0.9466698269732164,
|
15185 |
+
"grad_norm": 4.0102739334106445,
|
15186 |
+
"learning_rate": 1.7134413139228812e-07,
|
15187 |
+
"loss": 0.1162,
|
15188 |
+
"step": 1997
|
15189 |
+
},
|
15190 |
+
{
|
15191 |
+
"epoch": 0.9471438729556767,
|
15192 |
+
"grad_norm": 5.108349800109863,
|
15193 |
+
"learning_rate": 1.6830664790061124e-07,
|
15194 |
+
"loss": 0.1445,
|
15195 |
+
"step": 1998
|
15196 |
+
},
|
15197 |
+
{
|
15198 |
+
"epoch": 0.947617918938137,
|
15199 |
+
"grad_norm": 6.93289852142334,
|
15200 |
+
"learning_rate": 1.6529610056096768e-07,
|
15201 |
+
"loss": 0.1204,
|
15202 |
+
"step": 1999
|
15203 |
+
},
|
15204 |
+
{
|
15205 |
+
"epoch": 0.9480919649205973,
|
15206 |
+
"grad_norm": 8.224555015563965,
|
15207 |
+
"learning_rate": 1.6231249762143187e-07,
|
15208 |
+
"loss": 0.1914,
|
15209 |
+
"step": 2000
|
15210 |
+
},
|
15211 |
+
{
|
15212 |
+
"epoch": 0.9480919649205973,
|
15213 |
+
"eval_accuracy": 0.9935587761674718,
|
15214 |
+
"eval_f1": 0.9272727272727272,
|
15215 |
+
"eval_loss": 0.012721872888505459,
|
15216 |
+
"eval_precision": 0.8793103448275862,
|
15217 |
+
"eval_recall": 0.9807692307692307,
|
15218 |
+
"eval_runtime": 50.4202,
|
15219 |
+
"eval_samples_per_second": 5.375,
|
15220 |
+
"eval_steps_per_second": 0.179,
|
15221 |
+
"step": 2000
|
15222 |
}
|
15223 |
],
|
15224 |
"logging_steps": 1,
|
|
|
15238 |
"attributes": {}
|
15239 |
}
|
15240 |
},
|
15241 |
+
"total_flos": 5.343662741557084e+17,
|
15242 |
"train_batch_size": 8,
|
15243 |
"trial_name": null,
|
15244 |
"trial_params": null
|