Training in progress, step 6777, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +579 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e2e9047c1b951991a3cb533b5422cf65b7fdd99fce52ba9dad6cd543430b657
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65562a7ad6cabc3a9a834233cb6b2a6418ba0f2a4995c24942bae3fcdd716740
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1095a5622f7ae7057612f380cf391d514c18a62ac77c825675a6caf6ee67c65
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0991fb538d2fcdd7133b88b57d7408634c5853d2bea2f8795c0d2a2c04be8c2a
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68623acd4c5451515db193d2aa04b8145b1f1f36417e52c36086f855dd8b168d
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e32076e376b49ea017c893743edd08119946ffa77fa889406c367ad701334a0e
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be31033ff0655091be7d5cf4fb0b2133466588c00cd4b8a9f31a6082a8afcee4
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ef8509d1cfc7f1a4958331fec1ee3edaa3e6165fe80cc849e478a187e2339e3
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:712216dc073e1dfa28326996a976c217850297a16301f8f5ccfa922b1a5dc3ee
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9028ecc7ecc1acc6c8525827a2279969075801179a1f74a70512f85eeab8816a
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f69e552de9eb6a6f61b8231a8ce384288d7ac46d1fed91935b9e5cbb21762087
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:633aeca10962285051f888adc2c2c409b0bf3999362ccda037a788110e685916
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a317dad97e27324d1b8604ef54c8e6a4cccd5fd5efea166b6eda97fc4edd76b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -50939,6 +50939,581 @@
|
|
50939 |
"eval_samples_per_second": 5.902,
|
50940 |
"eval_steps_per_second": 0.203,
|
50941 |
"step": 6700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50942 |
}
|
50943 |
],
|
50944 |
"logging_steps": 1,
|
@@ -50953,12 +51528,12 @@
|
|
50953 |
"should_evaluate": false,
|
50954 |
"should_log": false,
|
50955 |
"should_save": true,
|
50956 |
-
"should_training_stop":
|
50957 |
},
|
50958 |
"attributes": {}
|
50959 |
}
|
50960 |
},
|
50961 |
-
"total_flos": 2.
|
50962 |
"train_batch_size": 8,
|
50963 |
"trial_name": null,
|
50964 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.999926226484692,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 6777,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
50939 |
"eval_samples_per_second": 5.902,
|
50940 |
"eval_steps_per_second": 0.203,
|
50941 |
"step": 6700
|
50942 |
+
},
|
50943 |
+
{
|
50944 |
+
"epoch": 0.9887126521578753,
|
50945 |
+
"grad_norm": 0.30346840620040894,
|
50946 |
+
"learning_rate": 7.661683413868748e-09,
|
50947 |
+
"loss": 0.0029,
|
50948 |
+
"step": 6701
|
50949 |
+
},
|
50950 |
+
{
|
50951 |
+
"epoch": 0.9888601991884913,
|
50952 |
+
"grad_norm": 3.611311674118042,
|
50953 |
+
"learning_rate": 7.461411546916264e-09,
|
50954 |
+
"loss": 0.0608,
|
50955 |
+
"step": 6702
|
50956 |
+
},
|
50957 |
+
{
|
50958 |
+
"epoch": 0.9890077462191074,
|
50959 |
+
"grad_norm": 3.273043632507324,
|
50960 |
+
"learning_rate": 7.263790976492769e-09,
|
50961 |
+
"loss": 0.0737,
|
50962 |
+
"step": 6703
|
50963 |
+
},
|
50964 |
+
{
|
50965 |
+
"epoch": 0.9891552932497234,
|
50966 |
+
"grad_norm": 1.5908385515213013,
|
50967 |
+
"learning_rate": 7.068821755030763e-09,
|
50968 |
+
"loss": 0.0437,
|
50969 |
+
"step": 6704
|
50970 |
+
},
|
50971 |
+
{
|
50972 |
+
"epoch": 0.9893028402803393,
|
50973 |
+
"grad_norm": 2.5678610801696777,
|
50974 |
+
"learning_rate": 6.876503934262202e-09,
|
50975 |
+
"loss": 0.1046,
|
50976 |
+
"step": 6705
|
50977 |
+
},
|
50978 |
+
{
|
50979 |
+
"epoch": 0.9894503873109554,
|
50980 |
+
"grad_norm": 1.9680798053741455,
|
50981 |
+
"learning_rate": 6.686837565215154e-09,
|
50982 |
+
"loss": 0.0491,
|
50983 |
+
"step": 6706
|
50984 |
+
},
|
50985 |
+
{
|
50986 |
+
"epoch": 0.9895979343415714,
|
50987 |
+
"grad_norm": 3.603398084640503,
|
50988 |
+
"learning_rate": 6.499822698210478e-09,
|
50989 |
+
"loss": 0.0613,
|
50990 |
+
"step": 6707
|
50991 |
+
},
|
50992 |
+
{
|
50993 |
+
"epoch": 0.9897454813721874,
|
50994 |
+
"grad_norm": 2.4477665424346924,
|
50995 |
+
"learning_rate": 6.315459382871813e-09,
|
50996 |
+
"loss": 0.0667,
|
50997 |
+
"step": 6708
|
50998 |
+
},
|
50999 |
+
{
|
51000 |
+
"epoch": 0.9898930284028034,
|
51001 |
+
"grad_norm": 1.95452082157135,
|
51002 |
+
"learning_rate": 6.1337476681122556e-09,
|
51003 |
+
"loss": 0.0635,
|
51004 |
+
"step": 6709
|
51005 |
+
},
|
51006 |
+
{
|
51007 |
+
"epoch": 0.9900405754334194,
|
51008 |
+
"grad_norm": 0.929094672203064,
|
51009 |
+
"learning_rate": 5.95468760214879e-09,
|
51010 |
+
"loss": 0.0181,
|
51011 |
+
"step": 6710
|
51012 |
+
},
|
51013 |
+
{
|
51014 |
+
"epoch": 0.9901881224640354,
|
51015 |
+
"grad_norm": 1.7649720907211304,
|
51016 |
+
"learning_rate": 5.77827923248786e-09,
|
51017 |
+
"loss": 0.0337,
|
51018 |
+
"step": 6711
|
51019 |
+
},
|
51020 |
+
{
|
51021 |
+
"epoch": 0.9903356694946515,
|
51022 |
+
"grad_norm": 2.4020333290100098,
|
51023 |
+
"learning_rate": 5.604522605937579e-09,
|
51024 |
+
"loss": 0.0745,
|
51025 |
+
"step": 6712
|
51026 |
+
},
|
51027 |
+
{
|
51028 |
+
"epoch": 0.9904832165252674,
|
51029 |
+
"grad_norm": 1.8048148155212402,
|
51030 |
+
"learning_rate": 5.433417768598847e-09,
|
51031 |
+
"loss": 0.0588,
|
51032 |
+
"step": 6713
|
51033 |
+
},
|
51034 |
+
{
|
51035 |
+
"epoch": 0.9906307635558834,
|
51036 |
+
"grad_norm": 1.9580947160720825,
|
51037 |
+
"learning_rate": 5.264964765870906e-09,
|
51038 |
+
"loss": 0.0477,
|
51039 |
+
"step": 6714
|
51040 |
+
},
|
51041 |
+
{
|
51042 |
+
"epoch": 0.9907783105864995,
|
51043 |
+
"grad_norm": 1.246323823928833,
|
51044 |
+
"learning_rate": 5.099163642449112e-09,
|
51045 |
+
"loss": 0.0336,
|
51046 |
+
"step": 6715
|
51047 |
+
},
|
51048 |
+
{
|
51049 |
+
"epoch": 0.9909258576171155,
|
51050 |
+
"grad_norm": 1.8688416481018066,
|
51051 |
+
"learning_rate": 4.9360144423260535e-09,
|
51052 |
+
"loss": 0.0259,
|
51053 |
+
"step": 6716
|
51054 |
+
},
|
51055 |
+
{
|
51056 |
+
"epoch": 0.9910734046477314,
|
51057 |
+
"grad_norm": 1.5381361246109009,
|
51058 |
+
"learning_rate": 4.775517208788217e-09,
|
51059 |
+
"loss": 0.0298,
|
51060 |
+
"step": 6717
|
51061 |
+
},
|
51062 |
+
{
|
51063 |
+
"epoch": 0.9912209516783475,
|
51064 |
+
"grad_norm": 1.709076166152954,
|
51065 |
+
"learning_rate": 4.617671984420425e-09,
|
51066 |
+
"loss": 0.0542,
|
51067 |
+
"step": 6718
|
51068 |
+
},
|
51069 |
+
{
|
51070 |
+
"epoch": 0.9913684987089635,
|
51071 |
+
"grad_norm": 1.3890794515609741,
|
51072 |
+
"learning_rate": 4.462478811104731e-09,
|
51073 |
+
"loss": 0.0558,
|
51074 |
+
"step": 6719
|
51075 |
+
},
|
51076 |
+
{
|
51077 |
+
"epoch": 0.9915160457395795,
|
51078 |
+
"grad_norm": 0.8841899633407593,
|
51079 |
+
"learning_rate": 4.309937730015978e-09,
|
51080 |
+
"loss": 0.0172,
|
51081 |
+
"step": 6720
|
51082 |
+
},
|
51083 |
+
{
|
51084 |
+
"epoch": 0.9915160457395795,
|
51085 |
+
"eval_accuracy": 0.9782923299565847,
|
51086 |
+
"eval_f1": 0.9629629629629629,
|
51087 |
+
"eval_loss": 0.055379100143909454,
|
51088 |
+
"eval_precision": 0.9798994974874372,
|
51089 |
+
"eval_recall": 0.9466019417475728,
|
51090 |
+
"eval_runtime": 50.3029,
|
51091 |
+
"eval_samples_per_second": 5.785,
|
51092 |
+
"eval_steps_per_second": 0.199,
|
51093 |
+
"step": 6720
|
51094 |
+
},
|
51095 |
+
{
|
51096 |
+
"epoch": 0.9916635927701954,
|
51097 |
+
"grad_norm": 1.6927621364593506,
|
51098 |
+
"learning_rate": 4.160048781628456e-09,
|
51099 |
+
"loss": 0.0152,
|
51100 |
+
"step": 6721
|
51101 |
+
},
|
51102 |
+
{
|
51103 |
+
"epoch": 0.9918111398008115,
|
51104 |
+
"grad_norm": 0.853327214717865,
|
51105 |
+
"learning_rate": 4.012812005712574e-09,
|
51106 |
+
"loss": 0.0098,
|
51107 |
+
"step": 6722
|
51108 |
+
},
|
51109 |
+
{
|
51110 |
+
"epoch": 0.9919586868314275,
|
51111 |
+
"grad_norm": 3.5236921310424805,
|
51112 |
+
"learning_rate": 3.8682274413337495e-09,
|
51113 |
+
"loss": 0.0527,
|
51114 |
+
"step": 6723
|
51115 |
+
},
|
51116 |
+
{
|
51117 |
+
"epoch": 0.9921062338620436,
|
51118 |
+
"grad_norm": 2.9586477279663086,
|
51119 |
+
"learning_rate": 3.726295126853519e-09,
|
51120 |
+
"loss": 0.0441,
|
51121 |
+
"step": 6724
|
51122 |
+
},
|
51123 |
+
{
|
51124 |
+
"epoch": 0.9922537808926596,
|
51125 |
+
"grad_norm": 0.3246397376060486,
|
51126 |
+
"learning_rate": 3.5870150999317567e-09,
|
51127 |
+
"loss": 0.0029,
|
51128 |
+
"step": 6725
|
51129 |
+
},
|
51130 |
+
{
|
51131 |
+
"epoch": 0.9924013279232755,
|
51132 |
+
"grad_norm": 1.3314762115478516,
|
51133 |
+
"learning_rate": 3.450387397522237e-09,
|
51134 |
+
"loss": 0.025,
|
51135 |
+
"step": 6726
|
51136 |
+
},
|
51137 |
+
{
|
51138 |
+
"epoch": 0.9925488749538915,
|
51139 |
+
"grad_norm": 1.7466126680374146,
|
51140 |
+
"learning_rate": 3.3164120558759614e-09,
|
51141 |
+
"loss": 0.0336,
|
51142 |
+
"step": 6727
|
51143 |
+
},
|
51144 |
+
{
|
51145 |
+
"epoch": 0.9926964219845076,
|
51146 |
+
"grad_norm": 0.8817312121391296,
|
51147 |
+
"learning_rate": 3.185089110542272e-09,
|
51148 |
+
"loss": 0.0208,
|
51149 |
+
"step": 6728
|
51150 |
+
},
|
51151 |
+
{
|
51152 |
+
"epoch": 0.9928439690151236,
|
51153 |
+
"grad_norm": 1.9117987155914307,
|
51154 |
+
"learning_rate": 3.056418596362187e-09,
|
51155 |
+
"loss": 0.0431,
|
51156 |
+
"step": 6729
|
51157 |
+
},
|
51158 |
+
{
|
51159 |
+
"epoch": 0.9929915160457395,
|
51160 |
+
"grad_norm": 5.536546230316162,
|
51161 |
+
"learning_rate": 2.9304005474761755e-09,
|
51162 |
+
"loss": 0.1609,
|
51163 |
+
"step": 6730
|
51164 |
+
},
|
51165 |
+
{
|
51166 |
+
"epoch": 0.9931390630763556,
|
51167 |
+
"grad_norm": 4.430239677429199,
|
51168 |
+
"learning_rate": 2.8070349973219336e-09,
|
51169 |
+
"loss": 0.1037,
|
51170 |
+
"step": 6731
|
51171 |
+
},
|
51172 |
+
{
|
51173 |
+
"epoch": 0.9932866101069716,
|
51174 |
+
"grad_norm": 1.1493936777114868,
|
51175 |
+
"learning_rate": 2.6863219786299466e-09,
|
51176 |
+
"loss": 0.0348,
|
51177 |
+
"step": 6732
|
51178 |
+
},
|
51179 |
+
{
|
51180 |
+
"epoch": 0.9934341571375876,
|
51181 |
+
"grad_norm": 1.1461900472640991,
|
51182 |
+
"learning_rate": 2.568261523430149e-09,
|
51183 |
+
"loss": 0.0286,
|
51184 |
+
"step": 6733
|
51185 |
+
},
|
51186 |
+
{
|
51187 |
+
"epoch": 0.9935817041682036,
|
51188 |
+
"grad_norm": 1.5313574075698853,
|
51189 |
+
"learning_rate": 2.452853663046373e-09,
|
51190 |
+
"loss": 0.0368,
|
51191 |
+
"step": 6734
|
51192 |
+
},
|
51193 |
+
{
|
51194 |
+
"epoch": 0.9937292511988196,
|
51195 |
+
"grad_norm": 1.981681227684021,
|
51196 |
+
"learning_rate": 2.3400984280985695e-09,
|
51197 |
+
"loss": 0.0701,
|
51198 |
+
"step": 6735
|
51199 |
+
},
|
51200 |
+
{
|
51201 |
+
"epoch": 0.9938767982294356,
|
51202 |
+
"grad_norm": 2.576758623123169,
|
51203 |
+
"learning_rate": 2.229995848506139e-09,
|
51204 |
+
"loss": 0.0525,
|
51205 |
+
"step": 6736
|
51206 |
+
},
|
51207 |
+
{
|
51208 |
+
"epoch": 0.9940243452600517,
|
51209 |
+
"grad_norm": 2.1853508949279785,
|
51210 |
+
"learning_rate": 2.1225459534801596e-09,
|
51211 |
+
"loss": 0.0543,
|
51212 |
+
"step": 6737
|
51213 |
+
},
|
51214 |
+
{
|
51215 |
+
"epoch": 0.9941718922906676,
|
51216 |
+
"grad_norm": 3.452421188354492,
|
51217 |
+
"learning_rate": 2.017748771531158e-09,
|
51218 |
+
"loss": 0.0628,
|
51219 |
+
"step": 6738
|
51220 |
+
},
|
51221 |
+
{
|
51222 |
+
"epoch": 0.9943194393212836,
|
51223 |
+
"grad_norm": 1.7515523433685303,
|
51224 |
+
"learning_rate": 1.915604330464671e-09,
|
51225 |
+
"loss": 0.0685,
|
51226 |
+
"step": 6739
|
51227 |
+
},
|
51228 |
+
{
|
51229 |
+
"epoch": 0.9944669863518997,
|
51230 |
+
"grad_norm": 2.203991413116455,
|
51231 |
+
"learning_rate": 1.8161126573823517e-09,
|
51232 |
+
"loss": 0.0444,
|
51233 |
+
"step": 6740
|
51234 |
+
},
|
51235 |
+
{
|
51236 |
+
"epoch": 0.9944669863518997,
|
51237 |
+
"eval_accuracy": 0.9782923299565847,
|
51238 |
+
"eval_f1": 0.9629629629629629,
|
51239 |
+
"eval_loss": 0.055171407759189606,
|
51240 |
+
"eval_precision": 0.9798994974874372,
|
51241 |
+
"eval_recall": 0.9466019417475728,
|
51242 |
+
"eval_runtime": 48.6957,
|
51243 |
+
"eval_samples_per_second": 5.976,
|
51244 |
+
"eval_steps_per_second": 0.205,
|
51245 |
+
"step": 6740
|
51246 |
+
},
|
51247 |
+
{
|
51248 |
+
"epoch": 0.9946145333825157,
|
51249 |
+
"grad_norm": 2.4897584915161133,
|
51250 |
+
"learning_rate": 1.7192737786819736e-09,
|
51251 |
+
"loss": 0.0722,
|
51252 |
+
"step": 6741
|
51253 |
+
},
|
51254 |
+
{
|
51255 |
+
"epoch": 0.9947620804131316,
|
51256 |
+
"grad_norm": 1.9944440126419067,
|
51257 |
+
"learning_rate": 1.6250877200574278e-09,
|
51258 |
+
"loss": 0.0995,
|
51259 |
+
"step": 6742
|
51260 |
+
},
|
51261 |
+
{
|
51262 |
+
"epoch": 0.9949096274437477,
|
51263 |
+
"grad_norm": 2.042222738265991,
|
51264 |
+
"learning_rate": 1.5335545064998346e-09,
|
51265 |
+
"loss": 0.0642,
|
51266 |
+
"step": 6743
|
51267 |
+
},
|
51268 |
+
{
|
51269 |
+
"epoch": 0.9950571744743637,
|
51270 |
+
"grad_norm": 1.745491623878479,
|
51271 |
+
"learning_rate": 1.4446741622942128e-09,
|
51272 |
+
"loss": 0.0506,
|
51273 |
+
"step": 6744
|
51274 |
+
},
|
51275 |
+
{
|
51276 |
+
"epoch": 0.9952047215049797,
|
51277 |
+
"grad_norm": 0.9310182929039001,
|
51278 |
+
"learning_rate": 1.3584467110228095e-09,
|
51279 |
+
"loss": 0.0122,
|
51280 |
+
"step": 6745
|
51281 |
+
},
|
51282 |
+
{
|
51283 |
+
"epoch": 0.9953522685355958,
|
51284 |
+
"grad_norm": 1.8398277759552002,
|
51285 |
+
"learning_rate": 1.2748721755651005e-09,
|
51286 |
+
"loss": 0.0261,
|
51287 |
+
"step": 6746
|
51288 |
+
},
|
51289 |
+
{
|
51290 |
+
"epoch": 0.9954998155662117,
|
51291 |
+
"grad_norm": 1.677306890487671,
|
51292 |
+
"learning_rate": 1.1939505780966809e-09,
|
51293 |
+
"loss": 0.0358,
|
51294 |
+
"step": 6747
|
51295 |
+
},
|
51296 |
+
{
|
51297 |
+
"epoch": 0.9956473625968277,
|
51298 |
+
"grad_norm": 1.2638682126998901,
|
51299 |
+
"learning_rate": 1.115681940085933e-09,
|
51300 |
+
"loss": 0.0283,
|
51301 |
+
"step": 6748
|
51302 |
+
},
|
51303 |
+
{
|
51304 |
+
"epoch": 0.9957949096274438,
|
51305 |
+
"grad_norm": 3.8664028644561768,
|
51306 |
+
"learning_rate": 1.040066282300689e-09,
|
51307 |
+
"loss": 0.0514,
|
51308 |
+
"step": 6749
|
51309 |
+
},
|
51310 |
+
{
|
51311 |
+
"epoch": 0.9959424566580598,
|
51312 |
+
"grad_norm": 1.0908302068710327,
|
51313 |
+
"learning_rate": 9.671036248048993e-10,
|
51314 |
+
"loss": 0.0171,
|
51315 |
+
"step": 6750
|
51316 |
+
},
|
51317 |
+
{
|
51318 |
+
"epoch": 0.9960900036886757,
|
51319 |
+
"grad_norm": 4.517603874206543,
|
51320 |
+
"learning_rate": 8.967939869553022e-10,
|
51321 |
+
"loss": 0.0963,
|
51322 |
+
"step": 6751
|
51323 |
+
},
|
51324 |
+
{
|
51325 |
+
"epoch": 0.9962375507192918,
|
51326 |
+
"grad_norm": 9.87070369720459,
|
51327 |
+
"learning_rate": 8.291373874091957e-10,
|
51328 |
+
"loss": 0.0511,
|
51329 |
+
"step": 6752
|
51330 |
+
},
|
51331 |
+
{
|
51332 |
+
"epoch": 0.9963850977499078,
|
51333 |
+
"grad_norm": 1.7819442749023438,
|
51334 |
+
"learning_rate": 7.641338441166657e-10,
|
51335 |
+
"loss": 0.0142,
|
51336 |
+
"step": 6753
|
51337 |
+
},
|
51338 |
+
{
|
51339 |
+
"epoch": 0.9965326447805238,
|
51340 |
+
"grad_norm": 1.9164355993270874,
|
51341 |
+
"learning_rate": 7.017833743261371e-10,
|
51342 |
+
"loss": 0.0333,
|
51343 |
+
"step": 6754
|
51344 |
+
},
|
51345 |
+
{
|
51346 |
+
"epoch": 0.9966801918111398,
|
51347 |
+
"grad_norm": 1.178529143333435,
|
51348 |
+
"learning_rate": 6.420859945788227e-10,
|
51349 |
+
"loss": 0.0175,
|
51350 |
+
"step": 6755
|
51351 |
+
},
|
51352 |
+
{
|
51353 |
+
"epoch": 0.9968277388417558,
|
51354 |
+
"grad_norm": 3.305959701538086,
|
51355 |
+
"learning_rate": 5.850417207153847e-10,
|
51356 |
+
"loss": 0.0576,
|
51357 |
+
"step": 6756
|
51358 |
+
},
|
51359 |
+
{
|
51360 |
+
"epoch": 0.9969752858723718,
|
51361 |
+
"grad_norm": 3.1544477939605713,
|
51362 |
+
"learning_rate": 5.306505678714934e-10,
|
51363 |
+
"loss": 0.0754,
|
51364 |
+
"step": 6757
|
51365 |
+
},
|
51366 |
+
{
|
51367 |
+
"epoch": 0.9971228329029879,
|
51368 |
+
"grad_norm": 1.3226299285888672,
|
51369 |
+
"learning_rate": 4.789125504778281e-10,
|
51370 |
+
"loss": 0.0261,
|
51371 |
+
"step": 6758
|
51372 |
+
},
|
51373 |
+
{
|
51374 |
+
"epoch": 0.9972703799336038,
|
51375 |
+
"grad_norm": 3.1813669204711914,
|
51376 |
+
"learning_rate": 4.2982768226229597e-10,
|
51377 |
+
"loss": 0.1108,
|
51378 |
+
"step": 6759
|
51379 |
+
},
|
51380 |
+
{
|
51381 |
+
"epoch": 0.9974179269642198,
|
51382 |
+
"grad_norm": 2.4142541885375977,
|
51383 |
+
"learning_rate": 3.8339597624892366e-10,
|
51384 |
+
"loss": 0.0481,
|
51385 |
+
"step": 6760
|
51386 |
+
},
|
51387 |
+
{
|
51388 |
+
"epoch": 0.9974179269642198,
|
51389 |
+
"eval_accuracy": 0.9782923299565847,
|
51390 |
+
"eval_f1": 0.9629629629629629,
|
51391 |
+
"eval_loss": 0.05494352802634239,
|
51392 |
+
"eval_precision": 0.9798994974874372,
|
51393 |
+
"eval_recall": 0.9466019417475728,
|
51394 |
+
"eval_runtime": 48.9599,
|
51395 |
+
"eval_samples_per_second": 5.944,
|
51396 |
+
"eval_steps_per_second": 0.204,
|
51397 |
+
"step": 6760
|
51398 |
+
},
|
51399 |
+
{
|
51400 |
+
"epoch": 0.9975654739948359,
|
51401 |
+
"grad_norm": 1.254651665687561,
|
51402 |
+
"learning_rate": 3.396174447556355e-10,
|
51403 |
+
"loss": 0.0174,
|
51404 |
+
"step": 6761
|
51405 |
+
},
|
51406 |
+
{
|
51407 |
+
"epoch": 0.9977130210254519,
|
51408 |
+
"grad_norm": 2.606041669845581,
|
51409 |
+
"learning_rate": 2.984920994009155e-10,
|
51410 |
+
"loss": 0.0746,
|
51411 |
+
"step": 6762
|
51412 |
+
},
|
51413 |
+
{
|
51414 |
+
"epoch": 0.9978605680560678,
|
51415 |
+
"grad_norm": 6.167627334594727,
|
51416 |
+
"learning_rate": 2.600199510938151e-10,
|
51417 |
+
"loss": 0.1954,
|
51418 |
+
"step": 6763
|
51419 |
+
},
|
51420 |
+
{
|
51421 |
+
"epoch": 0.9980081150866839,
|
51422 |
+
"grad_norm": 2.2177047729492188,
|
51423 |
+
"learning_rate": 2.2420101004394512e-10,
|
51424 |
+
"loss": 0.0676,
|
51425 |
+
"step": 6764
|
51426 |
+
},
|
51427 |
+
{
|
51428 |
+
"epoch": 0.9981556621172999,
|
51429 |
+
"grad_norm": 1.7990946769714355,
|
51430 |
+
"learning_rate": 1.9103528575370457e-10,
|
51431 |
+
"loss": 0.0404,
|
51432 |
+
"step": 6765
|
51433 |
+
},
|
51434 |
+
{
|
51435 |
+
"epoch": 0.9983032091479159,
|
51436 |
+
"grad_norm": 1.5802024602890015,
|
51437 |
+
"learning_rate": 1.6052278702272107e-10,
|
51438 |
+
"loss": 0.0352,
|
51439 |
+
"step": 6766
|
51440 |
+
},
|
51441 |
+
{
|
51442 |
+
"epoch": 0.998450756178532,
|
51443 |
+
"grad_norm": 1.7018808126449585,
|
51444 |
+
"learning_rate": 1.3266352194785114e-10,
|
51445 |
+
"loss": 0.0254,
|
51446 |
+
"step": 6767
|
51447 |
+
},
|
51448 |
+
{
|
51449 |
+
"epoch": 0.9985983032091479,
|
51450 |
+
"grad_norm": 2.102145195007324,
|
51451 |
+
"learning_rate": 1.0745749792095971e-10,
|
51452 |
+
"loss": 0.0427,
|
51453 |
+
"step": 6768
|
51454 |
+
},
|
51455 |
+
{
|
51456 |
+
"epoch": 0.9987458502397639,
|
51457 |
+
"grad_norm": 2.0960288047790527,
|
51458 |
+
"learning_rate": 8.490472163003027e-11,
|
51459 |
+
"loss": 0.0562,
|
51460 |
+
"step": 6769
|
51461 |
+
},
|
51462 |
+
{
|
51463 |
+
"epoch": 0.99889339727038,
|
51464 |
+
"grad_norm": 2.1226682662963867,
|
51465 |
+
"learning_rate": 6.500519905694446e-11,
|
51466 |
+
"loss": 0.0399,
|
51467 |
+
"step": 6770
|
51468 |
+
},
|
51469 |
+
{
|
51470 |
+
"epoch": 0.999040944300996,
|
51471 |
+
"grad_norm": 2.805297374725342,
|
51472 |
+
"learning_rate": 4.775893548414345e-11,
|
51473 |
+
"loss": 0.128,
|
51474 |
+
"step": 6771
|
51475 |
+
},
|
51476 |
+
{
|
51477 |
+
"epoch": 0.9991884913316119,
|
51478 |
+
"grad_norm": 3.2651734352111816,
|
51479 |
+
"learning_rate": 3.316593548574609e-11,
|
51480 |
+
"loss": 0.0469,
|
51481 |
+
"step": 6772
|
51482 |
+
},
|
51483 |
+
{
|
51484 |
+
"epoch": 0.999336038362228,
|
51485 |
+
"grad_norm": 1.4640666246414185,
|
51486 |
+
"learning_rate": 2.1226202935320518e-11,
|
51487 |
+
"loss": 0.033,
|
51488 |
+
"step": 6773
|
51489 |
+
},
|
51490 |
+
{
|
51491 |
+
"epoch": 0.999483585392844,
|
51492 |
+
"grad_norm": 1.624456524848938,
|
51493 |
+
"learning_rate": 1.1939740999222793e-11,
|
51494 |
+
"loss": 0.0241,
|
51495 |
+
"step": 6774
|
51496 |
+
},
|
51497 |
+
{
|
51498 |
+
"epoch": 0.99963113242346,
|
51499 |
+
"grad_norm": 1.3273454904556274,
|
51500 |
+
"learning_rate": 5.3065521421480356e-12,
|
51501 |
+
"loss": 0.034,
|
51502 |
+
"step": 6775
|
51503 |
+
},
|
51504 |
+
{
|
51505 |
+
"epoch": 0.999778679454076,
|
51506 |
+
"grad_norm": 0.9207583069801331,
|
51507 |
+
"learning_rate": 1.3266381237997393e-12,
|
51508 |
+
"loss": 0.026,
|
51509 |
+
"step": 6776
|
51510 |
+
},
|
51511 |
+
{
|
51512 |
+
"epoch": 0.999926226484692,
|
51513 |
+
"grad_norm": 1.3743399381637573,
|
51514 |
+
"learning_rate": 0.0,
|
51515 |
+
"loss": 0.0357,
|
51516 |
+
"step": 6777
|
51517 |
}
|
51518 |
],
|
51519 |
"logging_steps": 1,
|
|
|
51528 |
"should_evaluate": false,
|
51529 |
"should_log": false,
|
51530 |
"should_save": true,
|
51531 |
+
"should_training_stop": true
|
51532 |
},
|
51533 |
"attributes": {}
|
51534 |
}
|
51535 |
},
|
51536 |
+
"total_flos": 2.0879468077357466e+18,
|
51537 |
"train_batch_size": 8,
|
51538 |
"trial_name": null,
|
51539 |
"trial_params": null
|