Training in progress, step 1800, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab77ddbb637a9bc0d36ef5dbbbaa0af341f0bf80b31d6e285eda485f1898eb8f
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:286a3c21980a7a404bde1675a6009d842c88e5d90ac29813f962481a4368f26f
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09f72128a18889f676efa0e2e54c424b47d5e4132cc601aa9074f6f5411b8f94
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:041f67a3ce2b2d81077a75116ed983ec1c3d6e3f3611853c338c7ecf44913d9e
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:851162e189682dac7fc53a1c0d10ceb5145d8569c1a94696d7c715b4c49a67ea
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7d32e1e73d5d548c4bd50868ff94314b76b56f22bf14438e5afcb5d47b865b7
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9738f30136c7a74ad2e25b79cf200868a8a6622ff78163abd5aa23402612abf6
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4a3d3761905544d982ee155e6770c63fdcd8e1d6ad804c9e3fc0b48ef3c557a
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6294caa602c6fe4743ca0c7205bb0551de153ef41f54789786a229cd626bef4
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:305f7da916867733708b5e00527298ca628c3162916331e86427a0e6c1d84c36
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91731ae666b3fff1615f6c83fbbbe5160c401bb673770f4a96920e7df7c75154
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30c1d4c35a1c4cecfd92a1a7ea971a84bf462bf7dead3baefbde0a5e7b2317cc
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1735f1ec303b05af874a7e52da143869d5f926b0a117b607b330e92e0e8872be
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12939,6 +12939,766 @@
|
|
12939 |
"eval_samples_per_second": 5.502,
|
12940 |
"eval_steps_per_second": 0.183,
|
12941 |
"step": 1700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12942 |
}
|
12943 |
],
|
12944 |
"logging_steps": 1,
|
@@ -12958,7 +13718,7 @@
|
|
12958 |
"attributes": {}
|
12959 |
}
|
12960 |
},
|
12961 |
-
"total_flos": 4.
|
12962 |
"train_batch_size": 8,
|
12963 |
"trial_name": null,
|
12964 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8532827684285376,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 1800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12939 |
"eval_samples_per_second": 5.502,
|
12940 |
"eval_steps_per_second": 0.183,
|
12941 |
"step": 1700
|
12942 |
+
},
|
12943 |
+
{
|
12944 |
+
"epoch": 0.806352216164968,
|
12945 |
+
"grad_norm": 5.284399509429932,
|
12946 |
+
"learning_rate": 2.1949715425360173e-06,
|
12947 |
+
"loss": 0.2302,
|
12948 |
+
"step": 1701
|
12949 |
+
},
|
12950 |
+
{
|
12951 |
+
"epoch": 0.8068262621474283,
|
12952 |
+
"grad_norm": 3.2276976108551025,
|
12953 |
+
"learning_rate": 2.184634643477831e-06,
|
12954 |
+
"loss": 0.1153,
|
12955 |
+
"step": 1702
|
12956 |
+
},
|
12957 |
+
{
|
12958 |
+
"epoch": 0.8073003081298886,
|
12959 |
+
"grad_norm": 3.992670774459839,
|
12960 |
+
"learning_rate": 2.174319156384186e-06,
|
12961 |
+
"loss": 0.1346,
|
12962 |
+
"step": 1703
|
12963 |
+
},
|
12964 |
+
{
|
12965 |
+
"epoch": 0.8077743541123489,
|
12966 |
+
"grad_norm": 3.4810454845428467,
|
12967 |
+
"learning_rate": 2.164025109516692e-06,
|
12968 |
+
"loss": 0.095,
|
12969 |
+
"step": 1704
|
12970 |
+
},
|
12971 |
+
{
|
12972 |
+
"epoch": 0.8082484000948092,
|
12973 |
+
"grad_norm": 6.212849140167236,
|
12974 |
+
"learning_rate": 2.15375253107823e-06,
|
12975 |
+
"loss": 0.2299,
|
12976 |
+
"step": 1705
|
12977 |
+
},
|
12978 |
+
{
|
12979 |
+
"epoch": 0.8087224460772695,
|
12980 |
+
"grad_norm": 6.363603591918945,
|
12981 |
+
"learning_rate": 2.1435014492128547e-06,
|
12982 |
+
"loss": 0.2338,
|
12983 |
+
"step": 1706
|
12984 |
+
},
|
12985 |
+
{
|
12986 |
+
"epoch": 0.8091964920597298,
|
12987 |
+
"grad_norm": 2.451110363006592,
|
12988 |
+
"learning_rate": 2.1332718920057307e-06,
|
12989 |
+
"loss": 0.084,
|
12990 |
+
"step": 1707
|
12991 |
+
},
|
12992 |
+
{
|
12993 |
+
"epoch": 0.8096705380421901,
|
12994 |
+
"grad_norm": 6.087503910064697,
|
12995 |
+
"learning_rate": 2.1230638874830413e-06,
|
12996 |
+
"loss": 0.1361,
|
12997 |
+
"step": 1708
|
12998 |
+
},
|
12999 |
+
{
|
13000 |
+
"epoch": 0.8101445840246504,
|
13001 |
+
"grad_norm": 6.973792552947998,
|
13002 |
+
"learning_rate": 2.1128774636119307e-06,
|
13003 |
+
"loss": 0.2558,
|
13004 |
+
"step": 1709
|
13005 |
+
},
|
13006 |
+
{
|
13007 |
+
"epoch": 0.8106186300071107,
|
13008 |
+
"grad_norm": 6.504344463348389,
|
13009 |
+
"learning_rate": 2.102712648300418e-06,
|
13010 |
+
"loss": 0.1156,
|
13011 |
+
"step": 1710
|
13012 |
+
},
|
13013 |
+
{
|
13014 |
+
"epoch": 0.811092675989571,
|
13015 |
+
"grad_norm": 5.404758930206299,
|
13016 |
+
"learning_rate": 2.0925694693973162e-06,
|
13017 |
+
"loss": 0.1702,
|
13018 |
+
"step": 1711
|
13019 |
+
},
|
13020 |
+
{
|
13021 |
+
"epoch": 0.8115667219720313,
|
13022 |
+
"grad_norm": 5.191570281982422,
|
13023 |
+
"learning_rate": 2.082447954692164e-06,
|
13024 |
+
"loss": 0.1003,
|
13025 |
+
"step": 1712
|
13026 |
+
},
|
13027 |
+
{
|
13028 |
+
"epoch": 0.8120407679544915,
|
13029 |
+
"grad_norm": 3.821763515472412,
|
13030 |
+
"learning_rate": 2.0723481319151427e-06,
|
13031 |
+
"loss": 0.1208,
|
13032 |
+
"step": 1713
|
13033 |
+
},
|
13034 |
+
{
|
13035 |
+
"epoch": 0.8125148139369519,
|
13036 |
+
"grad_norm": 6.716168403625488,
|
13037 |
+
"learning_rate": 2.062270028737008e-06,
|
13038 |
+
"loss": 0.2031,
|
13039 |
+
"step": 1714
|
13040 |
+
},
|
13041 |
+
{
|
13042 |
+
"epoch": 0.8129888599194122,
|
13043 |
+
"grad_norm": 2.98030948638916,
|
13044 |
+
"learning_rate": 2.052213672769007e-06,
|
13045 |
+
"loss": 0.101,
|
13046 |
+
"step": 1715
|
13047 |
+
},
|
13048 |
+
{
|
13049 |
+
"epoch": 0.8134629059018725,
|
13050 |
+
"grad_norm": 5.622329235076904,
|
13051 |
+
"learning_rate": 2.042179091562805e-06,
|
13052 |
+
"loss": 0.1626,
|
13053 |
+
"step": 1716
|
13054 |
+
},
|
13055 |
+
{
|
13056 |
+
"epoch": 0.8139369518843328,
|
13057 |
+
"grad_norm": 3.3454723358154297,
|
13058 |
+
"learning_rate": 2.032166312610411e-06,
|
13059 |
+
"loss": 0.0978,
|
13060 |
+
"step": 1717
|
13061 |
+
},
|
13062 |
+
{
|
13063 |
+
"epoch": 0.814410997866793,
|
13064 |
+
"grad_norm": 5.193914413452148,
|
13065 |
+
"learning_rate": 2.0221753633441033e-06,
|
13066 |
+
"loss": 0.1742,
|
13067 |
+
"step": 1718
|
13068 |
+
},
|
13069 |
+
{
|
13070 |
+
"epoch": 0.8148850438492534,
|
13071 |
+
"grad_norm": 6.165769577026367,
|
13072 |
+
"learning_rate": 2.012206271136353e-06,
|
13073 |
+
"loss": 0.1491,
|
13074 |
+
"step": 1719
|
13075 |
+
},
|
13076 |
+
{
|
13077 |
+
"epoch": 0.8153590898317137,
|
13078 |
+
"grad_norm": 6.2735795974731445,
|
13079 |
+
"learning_rate": 2.002259063299744e-06,
|
13080 |
+
"loss": 0.1477,
|
13081 |
+
"step": 1720
|
13082 |
+
},
|
13083 |
+
{
|
13084 |
+
"epoch": 0.8153590898317137,
|
13085 |
+
"eval_accuracy": 0.9951690821256038,
|
13086 |
+
"eval_f1": 0.9444444444444444,
|
13087 |
+
"eval_loss": 0.0112903518602252,
|
13088 |
+
"eval_precision": 0.9107142857142857,
|
13089 |
+
"eval_recall": 0.9807692307692307,
|
13090 |
+
"eval_runtime": 49.2837,
|
13091 |
+
"eval_samples_per_second": 5.499,
|
13092 |
+
"eval_steps_per_second": 0.183,
|
13093 |
+
"step": 1720
|
13094 |
+
},
|
13095 |
+
{
|
13096 |
+
"epoch": 0.8158331358141739,
|
13097 |
+
"grad_norm": 8.393010139465332,
|
13098 |
+
"learning_rate": 1.992333767086905e-06,
|
13099 |
+
"loss": 0.2223,
|
13100 |
+
"step": 1721
|
13101 |
+
},
|
13102 |
+
{
|
13103 |
+
"epoch": 0.8163071817966343,
|
13104 |
+
"grad_norm": 8.414145469665527,
|
13105 |
+
"learning_rate": 1.982430409690439e-06,
|
13106 |
+
"loss": 0.1873,
|
13107 |
+
"step": 1722
|
13108 |
+
},
|
13109 |
+
{
|
13110 |
+
"epoch": 0.8167812277790946,
|
13111 |
+
"grad_norm": 3.9387011528015137,
|
13112 |
+
"learning_rate": 1.972549018242836e-06,
|
13113 |
+
"loss": 0.1164,
|
13114 |
+
"step": 1723
|
13115 |
+
},
|
13116 |
+
{
|
13117 |
+
"epoch": 0.8172552737615548,
|
13118 |
+
"grad_norm": 6.245006084442139,
|
13119 |
+
"learning_rate": 1.9626896198164093e-06,
|
13120 |
+
"loss": 0.1791,
|
13121 |
+
"step": 1724
|
13122 |
+
},
|
13123 |
+
{
|
13124 |
+
"epoch": 0.8177293197440152,
|
13125 |
+
"grad_norm": 3.7231218814849854,
|
13126 |
+
"learning_rate": 1.9528522414232122e-06,
|
13127 |
+
"loss": 0.1483,
|
13128 |
+
"step": 1725
|
13129 |
+
},
|
13130 |
+
{
|
13131 |
+
"epoch": 0.8182033657264755,
|
13132 |
+
"grad_norm": 7.006000995635986,
|
13133 |
+
"learning_rate": 1.9430369100149727e-06,
|
13134 |
+
"loss": 0.2054,
|
13135 |
+
"step": 1726
|
13136 |
+
},
|
13137 |
+
{
|
13138 |
+
"epoch": 0.8186774117089358,
|
13139 |
+
"grad_norm": 4.269167900085449,
|
13140 |
+
"learning_rate": 1.9332436524830167e-06,
|
13141 |
+
"loss": 0.1074,
|
13142 |
+
"step": 1727
|
13143 |
+
},
|
13144 |
+
{
|
13145 |
+
"epoch": 0.8191514576913961,
|
13146 |
+
"grad_norm": 4.43737268447876,
|
13147 |
+
"learning_rate": 1.9234724956581918e-06,
|
13148 |
+
"loss": 0.1194,
|
13149 |
+
"step": 1728
|
13150 |
+
},
|
13151 |
+
{
|
13152 |
+
"epoch": 0.8196255036738563,
|
13153 |
+
"grad_norm": 5.734049320220947,
|
13154 |
+
"learning_rate": 1.9137234663107995e-06,
|
13155 |
+
"loss": 0.2281,
|
13156 |
+
"step": 1729
|
13157 |
+
},
|
13158 |
+
{
|
13159 |
+
"epoch": 0.8200995496563167,
|
13160 |
+
"grad_norm": 4.329126358032227,
|
13161 |
+
"learning_rate": 1.9039965911505098e-06,
|
13162 |
+
"loss": 0.1464,
|
13163 |
+
"step": 1730
|
13164 |
+
},
|
13165 |
+
{
|
13166 |
+
"epoch": 0.820573595638777,
|
13167 |
+
"grad_norm": 6.123368263244629,
|
13168 |
+
"learning_rate": 1.8942918968263036e-06,
|
13169 |
+
"loss": 0.1876,
|
13170 |
+
"step": 1731
|
13171 |
+
},
|
13172 |
+
{
|
13173 |
+
"epoch": 0.8210476416212372,
|
13174 |
+
"grad_norm": 5.27827262878418,
|
13175 |
+
"learning_rate": 1.8846094099263911e-06,
|
13176 |
+
"loss": 0.1182,
|
13177 |
+
"step": 1732
|
13178 |
+
},
|
13179 |
+
{
|
13180 |
+
"epoch": 0.8215216876036976,
|
13181 |
+
"grad_norm": 3.905064344406128,
|
13182 |
+
"learning_rate": 1.8749491569781397e-06,
|
13183 |
+
"loss": 0.1297,
|
13184 |
+
"step": 1733
|
13185 |
+
},
|
13186 |
+
{
|
13187 |
+
"epoch": 0.8219957335861579,
|
13188 |
+
"grad_norm": 4.914556980133057,
|
13189 |
+
"learning_rate": 1.8653111644480004e-06,
|
13190 |
+
"loss": 0.1504,
|
13191 |
+
"step": 1734
|
13192 |
+
},
|
13193 |
+
{
|
13194 |
+
"epoch": 0.8224697795686181,
|
13195 |
+
"grad_norm": 2.135080337524414,
|
13196 |
+
"learning_rate": 1.8556954587414377e-06,
|
13197 |
+
"loss": 0.0756,
|
13198 |
+
"step": 1735
|
13199 |
+
},
|
13200 |
+
{
|
13201 |
+
"epoch": 0.8229438255510785,
|
13202 |
+
"grad_norm": 5.558071136474609,
|
13203 |
+
"learning_rate": 1.8461020662028583e-06,
|
13204 |
+
"loss": 0.1586,
|
13205 |
+
"step": 1736
|
13206 |
+
},
|
13207 |
+
{
|
13208 |
+
"epoch": 0.8234178715335387,
|
13209 |
+
"grad_norm": 4.518209934234619,
|
13210 |
+
"learning_rate": 1.8365310131155345e-06,
|
13211 |
+
"loss": 0.2052,
|
13212 |
+
"step": 1737
|
13213 |
+
},
|
13214 |
+
{
|
13215 |
+
"epoch": 0.8238919175159991,
|
13216 |
+
"grad_norm": 5.095973014831543,
|
13217 |
+
"learning_rate": 1.8269823257015351e-06,
|
13218 |
+
"loss": 0.1441,
|
13219 |
+
"step": 1738
|
13220 |
+
},
|
13221 |
+
{
|
13222 |
+
"epoch": 0.8243659634984594,
|
13223 |
+
"grad_norm": 5.229091644287109,
|
13224 |
+
"learning_rate": 1.8174560301216527e-06,
|
13225 |
+
"loss": 0.1543,
|
13226 |
+
"step": 1739
|
13227 |
+
},
|
13228 |
+
{
|
13229 |
+
"epoch": 0.8248400094809196,
|
13230 |
+
"grad_norm": 5.269925594329834,
|
13231 |
+
"learning_rate": 1.807952152475333e-06,
|
13232 |
+
"loss": 0.1781,
|
13233 |
+
"step": 1740
|
13234 |
+
},
|
13235 |
+
{
|
13236 |
+
"epoch": 0.8248400094809196,
|
13237 |
+
"eval_accuracy": 0.9935587761674718,
|
13238 |
+
"eval_f1": 0.9272727272727272,
|
13239 |
+
"eval_loss": 0.01386988628655672,
|
13240 |
+
"eval_precision": 0.8793103448275862,
|
13241 |
+
"eval_recall": 0.9807692307692307,
|
13242 |
+
"eval_runtime": 50.0148,
|
13243 |
+
"eval_samples_per_second": 5.418,
|
13244 |
+
"eval_steps_per_second": 0.18,
|
13245 |
+
"step": 1740
|
13246 |
+
},
|
13247 |
+
{
|
13248 |
+
"epoch": 0.82531405546338,
|
13249 |
+
"grad_norm": 3.926471710205078,
|
13250 |
+
"learning_rate": 1.7984707188006034e-06,
|
13251 |
+
"loss": 0.1284,
|
13252 |
+
"step": 1741
|
13253 |
+
},
|
13254 |
+
{
|
13255 |
+
"epoch": 0.8257881014458403,
|
13256 |
+
"grad_norm": 3.689272403717041,
|
13257 |
+
"learning_rate": 1.7890117550739995e-06,
|
13258 |
+
"loss": 0.1592,
|
13259 |
+
"step": 1742
|
13260 |
+
},
|
13261 |
+
{
|
13262 |
+
"epoch": 0.8262621474283005,
|
13263 |
+
"grad_norm": 8.198687553405762,
|
13264 |
+
"learning_rate": 1.7795752872104965e-06,
|
13265 |
+
"loss": 0.2166,
|
13266 |
+
"step": 1743
|
13267 |
+
},
|
13268 |
+
{
|
13269 |
+
"epoch": 0.8267361934107609,
|
13270 |
+
"grad_norm": 5.243244171142578,
|
13271 |
+
"learning_rate": 1.7701613410634367e-06,
|
13272 |
+
"loss": 0.238,
|
13273 |
+
"step": 1744
|
13274 |
+
},
|
13275 |
+
{
|
13276 |
+
"epoch": 0.8272102393932211,
|
13277 |
+
"grad_norm": 7.627920150756836,
|
13278 |
+
"learning_rate": 1.7607699424244583e-06,
|
13279 |
+
"loss": 0.1358,
|
13280 |
+
"step": 1745
|
13281 |
+
},
|
13282 |
+
{
|
13283 |
+
"epoch": 0.8276842853756814,
|
13284 |
+
"grad_norm": 4.7214884757995605,
|
13285 |
+
"learning_rate": 1.7514011170234258e-06,
|
13286 |
+
"loss": 0.1165,
|
13287 |
+
"step": 1746
|
13288 |
+
},
|
13289 |
+
{
|
13290 |
+
"epoch": 0.8281583313581418,
|
13291 |
+
"grad_norm": 4.949544906616211,
|
13292 |
+
"learning_rate": 1.7420548905283619e-06,
|
13293 |
+
"loss": 0.1227,
|
13294 |
+
"step": 1747
|
13295 |
+
},
|
13296 |
+
{
|
13297 |
+
"epoch": 0.828632377340602,
|
13298 |
+
"grad_norm": 3.5602853298187256,
|
13299 |
+
"learning_rate": 1.7327312885453695e-06,
|
13300 |
+
"loss": 0.1247,
|
13301 |
+
"step": 1748
|
13302 |
+
},
|
13303 |
+
{
|
13304 |
+
"epoch": 0.8291064233230624,
|
13305 |
+
"grad_norm": 4.533194065093994,
|
13306 |
+
"learning_rate": 1.7234303366185712e-06,
|
13307 |
+
"loss": 0.183,
|
13308 |
+
"step": 1749
|
13309 |
+
},
|
13310 |
+
{
|
13311 |
+
"epoch": 0.8295804693055226,
|
13312 |
+
"grad_norm": 4.326444625854492,
|
13313 |
+
"learning_rate": 1.7141520602300332e-06,
|
13314 |
+
"loss": 0.1427,
|
13315 |
+
"step": 1750
|
13316 |
+
},
|
13317 |
+
{
|
13318 |
+
"epoch": 0.8300545152879829,
|
13319 |
+
"grad_norm": 2.9050464630126953,
|
13320 |
+
"learning_rate": 1.7048964847996928e-06,
|
13321 |
+
"loss": 0.1105,
|
13322 |
+
"step": 1751
|
13323 |
+
},
|
13324 |
+
{
|
13325 |
+
"epoch": 0.8305285612704433,
|
13326 |
+
"grad_norm": 4.191965579986572,
|
13327 |
+
"learning_rate": 1.6956636356852984e-06,
|
13328 |
+
"loss": 0.1444,
|
13329 |
+
"step": 1752
|
13330 |
+
},
|
13331 |
+
{
|
13332 |
+
"epoch": 0.8310026072529035,
|
13333 |
+
"grad_norm": 4.245302677154541,
|
13334 |
+
"learning_rate": 1.6864535381823333e-06,
|
13335 |
+
"loss": 0.1695,
|
13336 |
+
"step": 1753
|
13337 |
+
},
|
13338 |
+
{
|
13339 |
+
"epoch": 0.8314766532353638,
|
13340 |
+
"grad_norm": 4.924167156219482,
|
13341 |
+
"learning_rate": 1.6772662175239451e-06,
|
13342 |
+
"loss": 0.1323,
|
13343 |
+
"step": 1754
|
13344 |
+
},
|
13345 |
+
{
|
13346 |
+
"epoch": 0.8319506992178242,
|
13347 |
+
"grad_norm": 3.5585765838623047,
|
13348 |
+
"learning_rate": 1.668101698880883e-06,
|
13349 |
+
"loss": 0.1285,
|
13350 |
+
"step": 1755
|
13351 |
+
},
|
13352 |
+
{
|
13353 |
+
"epoch": 0.8324247452002844,
|
13354 |
+
"grad_norm": 5.416965484619141,
|
13355 |
+
"learning_rate": 1.6589600073614175e-06,
|
13356 |
+
"loss": 0.1603,
|
13357 |
+
"step": 1756
|
13358 |
+
},
|
13359 |
+
{
|
13360 |
+
"epoch": 0.8328987911827447,
|
13361 |
+
"grad_norm": 4.387139320373535,
|
13362 |
+
"learning_rate": 1.6498411680112925e-06,
|
13363 |
+
"loss": 0.1554,
|
13364 |
+
"step": 1757
|
13365 |
+
},
|
13366 |
+
{
|
13367 |
+
"epoch": 0.833372837165205,
|
13368 |
+
"grad_norm": 3.8568196296691895,
|
13369 |
+
"learning_rate": 1.6407452058136298e-06,
|
13370 |
+
"loss": 0.1279,
|
13371 |
+
"step": 1758
|
13372 |
+
},
|
13373 |
+
{
|
13374 |
+
"epoch": 0.8338468831476653,
|
13375 |
+
"grad_norm": 5.32737922668457,
|
13376 |
+
"learning_rate": 1.6316721456888807e-06,
|
13377 |
+
"loss": 0.209,
|
13378 |
+
"step": 1759
|
13379 |
+
},
|
13380 |
+
{
|
13381 |
+
"epoch": 0.8343209291301257,
|
13382 |
+
"grad_norm": 6.815487861633301,
|
13383 |
+
"learning_rate": 1.6226220124947513e-06,
|
13384 |
+
"loss": 0.1517,
|
13385 |
+
"step": 1760
|
13386 |
+
},
|
13387 |
+
{
|
13388 |
+
"epoch": 0.8343209291301257,
|
13389 |
+
"eval_accuracy": 0.9935587761674718,
|
13390 |
+
"eval_f1": 0.9272727272727272,
|
13391 |
+
"eval_loss": 0.012907618656754494,
|
13392 |
+
"eval_precision": 0.8793103448275862,
|
13393 |
+
"eval_recall": 0.9807692307692307,
|
13394 |
+
"eval_runtime": 50.0121,
|
13395 |
+
"eval_samples_per_second": 5.419,
|
13396 |
+
"eval_steps_per_second": 0.18,
|
13397 |
+
"step": 1760
|
13398 |
+
},
|
13399 |
+
{
|
13400 |
+
"epoch": 0.8347949751125859,
|
13401 |
+
"grad_norm": 3.5248119831085205,
|
13402 |
+
"learning_rate": 1.6135948310261272e-06,
|
13403 |
+
"loss": 0.1413,
|
13404 |
+
"step": 1761
|
13405 |
+
},
|
13406 |
+
{
|
13407 |
+
"epoch": 0.8352690210950462,
|
13408 |
+
"grad_norm": 3.0675511360168457,
|
13409 |
+
"learning_rate": 1.6045906260150212e-06,
|
13410 |
+
"loss": 0.1353,
|
13411 |
+
"step": 1762
|
13412 |
+
},
|
13413 |
+
{
|
13414 |
+
"epoch": 0.8357430670775066,
|
13415 |
+
"grad_norm": 7.004110336303711,
|
13416 |
+
"learning_rate": 1.595609422130494e-06,
|
13417 |
+
"loss": 0.1473,
|
13418 |
+
"step": 1763
|
13419 |
+
},
|
13420 |
+
{
|
13421 |
+
"epoch": 0.8362171130599668,
|
13422 |
+
"grad_norm": 2.630929470062256,
|
13423 |
+
"learning_rate": 1.5866512439785876e-06,
|
13424 |
+
"loss": 0.0991,
|
13425 |
+
"step": 1764
|
13426 |
+
},
|
13427 |
+
{
|
13428 |
+
"epoch": 0.8366911590424271,
|
13429 |
+
"grad_norm": 3.722667694091797,
|
13430 |
+
"learning_rate": 1.5777161161022614e-06,
|
13431 |
+
"loss": 0.1403,
|
13432 |
+
"step": 1765
|
13433 |
+
},
|
13434 |
+
{
|
13435 |
+
"epoch": 0.8371652050248874,
|
13436 |
+
"grad_norm": 3.6338841915130615,
|
13437 |
+
"learning_rate": 1.5688040629813229e-06,
|
13438 |
+
"loss": 0.0963,
|
13439 |
+
"step": 1766
|
13440 |
+
},
|
13441 |
+
{
|
13442 |
+
"epoch": 0.8376392510073477,
|
13443 |
+
"grad_norm": 2.220780611038208,
|
13444 |
+
"learning_rate": 1.5599151090323627e-06,
|
13445 |
+
"loss": 0.094,
|
13446 |
+
"step": 1767
|
13447 |
+
},
|
13448 |
+
{
|
13449 |
+
"epoch": 0.838113296989808,
|
13450 |
+
"grad_norm": 5.549960136413574,
|
13451 |
+
"learning_rate": 1.5510492786086828e-06,
|
13452 |
+
"loss": 0.2037,
|
13453 |
+
"step": 1768
|
13454 |
+
},
|
13455 |
+
{
|
13456 |
+
"epoch": 0.8385873429722683,
|
13457 |
+
"grad_norm": 3.976283073425293,
|
13458 |
+
"learning_rate": 1.5422065960002364e-06,
|
13459 |
+
"loss": 0.1453,
|
13460 |
+
"step": 1769
|
13461 |
+
},
|
13462 |
+
{
|
13463 |
+
"epoch": 0.8390613889547286,
|
13464 |
+
"grad_norm": 5.508865833282471,
|
13465 |
+
"learning_rate": 1.5333870854335554e-06,
|
13466 |
+
"loss": 0.2064,
|
13467 |
+
"step": 1770
|
13468 |
+
},
|
13469 |
+
{
|
13470 |
+
"epoch": 0.839535434937189,
|
13471 |
+
"grad_norm": 3.408942222595215,
|
13472 |
+
"learning_rate": 1.5245907710716912e-06,
|
13473 |
+
"loss": 0.1245,
|
13474 |
+
"step": 1771
|
13475 |
+
},
|
13476 |
+
{
|
13477 |
+
"epoch": 0.8400094809196492,
|
13478 |
+
"grad_norm": 7.579738616943359,
|
13479 |
+
"learning_rate": 1.5158176770141342e-06,
|
13480 |
+
"loss": 0.1888,
|
13481 |
+
"step": 1772
|
13482 |
+
},
|
13483 |
+
{
|
13484 |
+
"epoch": 0.8404835269021095,
|
13485 |
+
"grad_norm": 6.760648727416992,
|
13486 |
+
"learning_rate": 1.5070678272967654e-06,
|
13487 |
+
"loss": 0.2148,
|
13488 |
+
"step": 1773
|
13489 |
+
},
|
13490 |
+
{
|
13491 |
+
"epoch": 0.8409575728845698,
|
13492 |
+
"grad_norm": 3.159531593322754,
|
13493 |
+
"learning_rate": 1.4983412458917846e-06,
|
13494 |
+
"loss": 0.1209,
|
13495 |
+
"step": 1774
|
13496 |
+
},
|
13497 |
+
{
|
13498 |
+
"epoch": 0.8414316188670301,
|
13499 |
+
"grad_norm": 4.622367858886719,
|
13500 |
+
"learning_rate": 1.4896379567076369e-06,
|
13501 |
+
"loss": 0.1917,
|
13502 |
+
"step": 1775
|
13503 |
+
},
|
13504 |
+
{
|
13505 |
+
"epoch": 0.8419056648494904,
|
13506 |
+
"grad_norm": 3.152876377105713,
|
13507 |
+
"learning_rate": 1.4809579835889564e-06,
|
13508 |
+
"loss": 0.0609,
|
13509 |
+
"step": 1776
|
13510 |
+
},
|
13511 |
+
{
|
13512 |
+
"epoch": 0.8423797108319507,
|
13513 |
+
"grad_norm": 3.5214357376098633,
|
13514 |
+
"learning_rate": 1.472301350316495e-06,
|
13515 |
+
"loss": 0.1228,
|
13516 |
+
"step": 1777
|
13517 |
+
},
|
13518 |
+
{
|
13519 |
+
"epoch": 0.842853756814411,
|
13520 |
+
"grad_norm": 2.849489450454712,
|
13521 |
+
"learning_rate": 1.4636680806070625e-06,
|
13522 |
+
"loss": 0.0938,
|
13523 |
+
"step": 1778
|
13524 |
+
},
|
13525 |
+
{
|
13526 |
+
"epoch": 0.8433278027968713,
|
13527 |
+
"grad_norm": 5.918670654296875,
|
13528 |
+
"learning_rate": 1.4550581981134571e-06,
|
13529 |
+
"loss": 0.185,
|
13530 |
+
"step": 1779
|
13531 |
+
},
|
13532 |
+
{
|
13533 |
+
"epoch": 0.8438018487793316,
|
13534 |
+
"grad_norm": 4.758391857147217,
|
13535 |
+
"learning_rate": 1.4464717264244043e-06,
|
13536 |
+
"loss": 0.1811,
|
13537 |
+
"step": 1780
|
13538 |
+
},
|
13539 |
+
{
|
13540 |
+
"epoch": 0.8438018487793316,
|
13541 |
+
"eval_accuracy": 0.9943639291465378,
|
13542 |
+
"eval_f1": 0.9357798165137615,
|
13543 |
+
"eval_loss": 0.012266670353710651,
|
13544 |
+
"eval_precision": 0.8947368421052632,
|
13545 |
+
"eval_recall": 0.9807692307692307,
|
13546 |
+
"eval_runtime": 50.3857,
|
13547 |
+
"eval_samples_per_second": 5.379,
|
13548 |
+
"eval_steps_per_second": 0.179,
|
13549 |
+
"step": 1780
|
13550 |
+
},
|
13551 |
+
{
|
13552 |
+
"epoch": 0.8442758947617919,
|
13553 |
+
"grad_norm": 3.0533530712127686,
|
13554 |
+
"learning_rate": 1.43790868906449e-06,
|
13555 |
+
"loss": 0.0867,
|
13556 |
+
"step": 1781
|
13557 |
+
},
|
13558 |
+
{
|
13559 |
+
"epoch": 0.8447499407442522,
|
13560 |
+
"grad_norm": 3.962001085281372,
|
13561 |
+
"learning_rate": 1.429369109494091e-06,
|
13562 |
+
"loss": 0.182,
|
13563 |
+
"step": 1782
|
13564 |
+
},
|
13565 |
+
{
|
13566 |
+
"epoch": 0.8452239867267125,
|
13567 |
+
"grad_norm": 4.5312180519104,
|
13568 |
+
"learning_rate": 1.4208530111093244e-06,
|
13569 |
+
"loss": 0.1194,
|
13570 |
+
"step": 1783
|
13571 |
+
},
|
13572 |
+
{
|
13573 |
+
"epoch": 0.8456980327091728,
|
13574 |
+
"grad_norm": 6.248788356781006,
|
13575 |
+
"learning_rate": 1.4123604172419714e-06,
|
13576 |
+
"loss": 0.1418,
|
13577 |
+
"step": 1784
|
13578 |
+
},
|
13579 |
+
{
|
13580 |
+
"epoch": 0.8461720786916331,
|
13581 |
+
"grad_norm": 5.026639938354492,
|
13582 |
+
"learning_rate": 1.4038913511594166e-06,
|
13583 |
+
"loss": 0.1182,
|
13584 |
+
"step": 1785
|
13585 |
+
},
|
13586 |
+
{
|
13587 |
+
"epoch": 0.8466461246740934,
|
13588 |
+
"grad_norm": 3.611996650695801,
|
13589 |
+
"learning_rate": 1.395445836064586e-06,
|
13590 |
+
"loss": 0.1078,
|
13591 |
+
"step": 1786
|
13592 |
+
},
|
13593 |
+
{
|
13594 |
+
"epoch": 0.8471201706565537,
|
13595 |
+
"grad_norm": 6.5539984703063965,
|
13596 |
+
"learning_rate": 1.3870238950958837e-06,
|
13597 |
+
"loss": 0.15,
|
13598 |
+
"step": 1787
|
13599 |
+
},
|
13600 |
+
{
|
13601 |
+
"epoch": 0.847594216639014,
|
13602 |
+
"grad_norm": 5.68766450881958,
|
13603 |
+
"learning_rate": 1.378625551327124e-06,
|
13604 |
+
"loss": 0.1344,
|
13605 |
+
"step": 1788
|
13606 |
+
},
|
13607 |
+
{
|
13608 |
+
"epoch": 0.8480682626214743,
|
13609 |
+
"grad_norm": 5.3630828857421875,
|
13610 |
+
"learning_rate": 1.3702508277674731e-06,
|
13611 |
+
"loss": 0.2046,
|
13612 |
+
"step": 1789
|
13613 |
+
},
|
13614 |
+
{
|
13615 |
+
"epoch": 0.8485423086039345,
|
13616 |
+
"grad_norm": 3.6046223640441895,
|
13617 |
+
"learning_rate": 1.3618997473613837e-06,
|
13618 |
+
"loss": 0.1093,
|
13619 |
+
"step": 1790
|
13620 |
+
},
|
13621 |
+
{
|
13622 |
+
"epoch": 0.8490163545863949,
|
13623 |
+
"grad_norm": 5.983584403991699,
|
13624 |
+
"learning_rate": 1.353572332988534e-06,
|
13625 |
+
"loss": 0.1991,
|
13626 |
+
"step": 1791
|
13627 |
+
},
|
13628 |
+
{
|
13629 |
+
"epoch": 0.8494904005688552,
|
13630 |
+
"grad_norm": 10.93433952331543,
|
13631 |
+
"learning_rate": 1.3452686074637632e-06,
|
13632 |
+
"loss": 0.1925,
|
13633 |
+
"step": 1792
|
13634 |
+
},
|
13635 |
+
{
|
13636 |
+
"epoch": 0.8499644465513154,
|
13637 |
+
"grad_norm": 5.214844226837158,
|
13638 |
+
"learning_rate": 1.3369885935370086e-06,
|
13639 |
+
"loss": 0.1227,
|
13640 |
+
"step": 1793
|
13641 |
+
},
|
13642 |
+
{
|
13643 |
+
"epoch": 0.8504384925337758,
|
13644 |
+
"grad_norm": 4.261415481567383,
|
13645 |
+
"learning_rate": 1.328732313893245e-06,
|
13646 |
+
"loss": 0.1287,
|
13647 |
+
"step": 1794
|
13648 |
+
},
|
13649 |
+
{
|
13650 |
+
"epoch": 0.850912538516236,
|
13651 |
+
"grad_norm": 4.207308292388916,
|
13652 |
+
"learning_rate": 1.320499791152421e-06,
|
13653 |
+
"loss": 0.2133,
|
13654 |
+
"step": 1795
|
13655 |
+
},
|
13656 |
+
{
|
13657 |
+
"epoch": 0.8513865844986964,
|
13658 |
+
"grad_norm": 4.4212141036987305,
|
13659 |
+
"learning_rate": 1.3122910478693984e-06,
|
13660 |
+
"loss": 0.1429,
|
13661 |
+
"step": 1796
|
13662 |
+
},
|
13663 |
+
{
|
13664 |
+
"epoch": 0.8518606304811567,
|
13665 |
+
"grad_norm": 4.221442222595215,
|
13666 |
+
"learning_rate": 1.30410610653389e-06,
|
13667 |
+
"loss": 0.1585,
|
13668 |
+
"step": 1797
|
13669 |
+
},
|
13670 |
+
{
|
13671 |
+
"epoch": 0.8523346764636169,
|
13672 |
+
"grad_norm": 3.037301778793335,
|
13673 |
+
"learning_rate": 1.295944989570398e-06,
|
13674 |
+
"loss": 0.1518,
|
13675 |
+
"step": 1798
|
13676 |
+
},
|
13677 |
+
{
|
13678 |
+
"epoch": 0.8528087224460773,
|
13679 |
+
"grad_norm": 4.325135231018066,
|
13680 |
+
"learning_rate": 1.2878077193381511e-06,
|
13681 |
+
"loss": 0.1136,
|
13682 |
+
"step": 1799
|
13683 |
+
},
|
13684 |
+
{
|
13685 |
+
"epoch": 0.8532827684285376,
|
13686 |
+
"grad_norm": 4.634499549865723,
|
13687 |
+
"learning_rate": 1.279694318131046e-06,
|
13688 |
+
"loss": 0.1592,
|
13689 |
+
"step": 1800
|
13690 |
+
},
|
13691 |
+
{
|
13692 |
+
"epoch": 0.8532827684285376,
|
13693 |
+
"eval_accuracy": 0.9943639291465378,
|
13694 |
+
"eval_f1": 0.9369369369369369,
|
13695 |
+
"eval_loss": 0.01357492059469223,
|
13696 |
+
"eval_precision": 0.8813559322033898,
|
13697 |
+
"eval_recall": 1.0,
|
13698 |
+
"eval_runtime": 51.1253,
|
13699 |
+
"eval_samples_per_second": 5.301,
|
13700 |
+
"eval_steps_per_second": 0.176,
|
13701 |
+
"step": 1800
|
13702 |
}
|
13703 |
],
|
13704 |
"logging_steps": 1,
|
|
|
13718 |
"attributes": {}
|
13719 |
}
|
13720 |
},
|
13721 |
+
"total_flos": 4.8095721145604506e+17,
|
13722 |
"train_batch_size": 8,
|
13723 |
"trial_name": null,
|
13724 |
"trial_params": null
|