Training in progress, step 6777, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +579 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e2e9047c1b951991a3cb533b5422cf65b7fdd99fce52ba9dad6cd543430b657
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65562a7ad6cabc3a9a834233cb6b2a6418ba0f2a4995c24942bae3fcdd716740
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1095a5622f7ae7057612f380cf391d514c18a62ac77c825675a6caf6ee67c65
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0991fb538d2fcdd7133b88b57d7408634c5853d2bea2f8795c0d2a2c04be8c2a
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68623acd4c5451515db193d2aa04b8145b1f1f36417e52c36086f855dd8b168d
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e32076e376b49ea017c893743edd08119946ffa77fa889406c367ad701334a0e
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be31033ff0655091be7d5cf4fb0b2133466588c00cd4b8a9f31a6082a8afcee4
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ef8509d1cfc7f1a4958331fec1ee3edaa3e6165fe80cc849e478a187e2339e3
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:712216dc073e1dfa28326996a976c217850297a16301f8f5ccfa922b1a5dc3ee
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9028ecc7ecc1acc6c8525827a2279969075801179a1f74a70512f85eeab8816a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f69e552de9eb6a6f61b8231a8ce384288d7ac46d1fed91935b9e5cbb21762087
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:633aeca10962285051f888adc2c2c409b0bf3999362ccda037a788110e685916
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a317dad97e27324d1b8604ef54c8e6a4cccd5fd5efea166b6eda97fc4edd76b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -50939,6 +50939,581 @@
|
|
| 50939 |
"eval_samples_per_second": 5.902,
|
| 50940 |
"eval_steps_per_second": 0.203,
|
| 50941 |
"step": 6700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50942 |
}
|
| 50943 |
],
|
| 50944 |
"logging_steps": 1,
|
|
@@ -50953,12 +51528,12 @@
|
|
| 50953 |
"should_evaluate": false,
|
| 50954 |
"should_log": false,
|
| 50955 |
"should_save": true,
|
| 50956 |
-
"should_training_stop":
|
| 50957 |
},
|
| 50958 |
"attributes": {}
|
| 50959 |
}
|
| 50960 |
},
|
| 50961 |
-
"total_flos": 2.
|
| 50962 |
"train_batch_size": 8,
|
| 50963 |
"trial_name": null,
|
| 50964 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.999926226484692,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 6777,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 50939 |
"eval_samples_per_second": 5.902,
|
| 50940 |
"eval_steps_per_second": 0.203,
|
| 50941 |
"step": 6700
|
| 50942 |
+
},
|
| 50943 |
+
{
|
| 50944 |
+
"epoch": 0.9887126521578753,
|
| 50945 |
+
"grad_norm": 0.30346840620040894,
|
| 50946 |
+
"learning_rate": 7.661683413868748e-09,
|
| 50947 |
+
"loss": 0.0029,
|
| 50948 |
+
"step": 6701
|
| 50949 |
+
},
|
| 50950 |
+
{
|
| 50951 |
+
"epoch": 0.9888601991884913,
|
| 50952 |
+
"grad_norm": 3.611311674118042,
|
| 50953 |
+
"learning_rate": 7.461411546916264e-09,
|
| 50954 |
+
"loss": 0.0608,
|
| 50955 |
+
"step": 6702
|
| 50956 |
+
},
|
| 50957 |
+
{
|
| 50958 |
+
"epoch": 0.9890077462191074,
|
| 50959 |
+
"grad_norm": 3.273043632507324,
|
| 50960 |
+
"learning_rate": 7.263790976492769e-09,
|
| 50961 |
+
"loss": 0.0737,
|
| 50962 |
+
"step": 6703
|
| 50963 |
+
},
|
| 50964 |
+
{
|
| 50965 |
+
"epoch": 0.9891552932497234,
|
| 50966 |
+
"grad_norm": 1.5908385515213013,
|
| 50967 |
+
"learning_rate": 7.068821755030763e-09,
|
| 50968 |
+
"loss": 0.0437,
|
| 50969 |
+
"step": 6704
|
| 50970 |
+
},
|
| 50971 |
+
{
|
| 50972 |
+
"epoch": 0.9893028402803393,
|
| 50973 |
+
"grad_norm": 2.5678610801696777,
|
| 50974 |
+
"learning_rate": 6.876503934262202e-09,
|
| 50975 |
+
"loss": 0.1046,
|
| 50976 |
+
"step": 6705
|
| 50977 |
+
},
|
| 50978 |
+
{
|
| 50979 |
+
"epoch": 0.9894503873109554,
|
| 50980 |
+
"grad_norm": 1.9680798053741455,
|
| 50981 |
+
"learning_rate": 6.686837565215154e-09,
|
| 50982 |
+
"loss": 0.0491,
|
| 50983 |
+
"step": 6706
|
| 50984 |
+
},
|
| 50985 |
+
{
|
| 50986 |
+
"epoch": 0.9895979343415714,
|
| 50987 |
+
"grad_norm": 3.603398084640503,
|
| 50988 |
+
"learning_rate": 6.499822698210478e-09,
|
| 50989 |
+
"loss": 0.0613,
|
| 50990 |
+
"step": 6707
|
| 50991 |
+
},
|
| 50992 |
+
{
|
| 50993 |
+
"epoch": 0.9897454813721874,
|
| 50994 |
+
"grad_norm": 2.4477665424346924,
|
| 50995 |
+
"learning_rate": 6.315459382871813e-09,
|
| 50996 |
+
"loss": 0.0667,
|
| 50997 |
+
"step": 6708
|
| 50998 |
+
},
|
| 50999 |
+
{
|
| 51000 |
+
"epoch": 0.9898930284028034,
|
| 51001 |
+
"grad_norm": 1.95452082157135,
|
| 51002 |
+
"learning_rate": 6.1337476681122556e-09,
|
| 51003 |
+
"loss": 0.0635,
|
| 51004 |
+
"step": 6709
|
| 51005 |
+
},
|
| 51006 |
+
{
|
| 51007 |
+
"epoch": 0.9900405754334194,
|
| 51008 |
+
"grad_norm": 0.929094672203064,
|
| 51009 |
+
"learning_rate": 5.95468760214879e-09,
|
| 51010 |
+
"loss": 0.0181,
|
| 51011 |
+
"step": 6710
|
| 51012 |
+
},
|
| 51013 |
+
{
|
| 51014 |
+
"epoch": 0.9901881224640354,
|
| 51015 |
+
"grad_norm": 1.7649720907211304,
|
| 51016 |
+
"learning_rate": 5.77827923248786e-09,
|
| 51017 |
+
"loss": 0.0337,
|
| 51018 |
+
"step": 6711
|
| 51019 |
+
},
|
| 51020 |
+
{
|
| 51021 |
+
"epoch": 0.9903356694946515,
|
| 51022 |
+
"grad_norm": 2.4020333290100098,
|
| 51023 |
+
"learning_rate": 5.604522605937579e-09,
|
| 51024 |
+
"loss": 0.0745,
|
| 51025 |
+
"step": 6712
|
| 51026 |
+
},
|
| 51027 |
+
{
|
| 51028 |
+
"epoch": 0.9904832165252674,
|
| 51029 |
+
"grad_norm": 1.8048148155212402,
|
| 51030 |
+
"learning_rate": 5.433417768598847e-09,
|
| 51031 |
+
"loss": 0.0588,
|
| 51032 |
+
"step": 6713
|
| 51033 |
+
},
|
| 51034 |
+
{
|
| 51035 |
+
"epoch": 0.9906307635558834,
|
| 51036 |
+
"grad_norm": 1.9580947160720825,
|
| 51037 |
+
"learning_rate": 5.264964765870906e-09,
|
| 51038 |
+
"loss": 0.0477,
|
| 51039 |
+
"step": 6714
|
| 51040 |
+
},
|
| 51041 |
+
{
|
| 51042 |
+
"epoch": 0.9907783105864995,
|
| 51043 |
+
"grad_norm": 1.246323823928833,
|
| 51044 |
+
"learning_rate": 5.099163642449112e-09,
|
| 51045 |
+
"loss": 0.0336,
|
| 51046 |
+
"step": 6715
|
| 51047 |
+
},
|
| 51048 |
+
{
|
| 51049 |
+
"epoch": 0.9909258576171155,
|
| 51050 |
+
"grad_norm": 1.8688416481018066,
|
| 51051 |
+
"learning_rate": 4.9360144423260535e-09,
|
| 51052 |
+
"loss": 0.0259,
|
| 51053 |
+
"step": 6716
|
| 51054 |
+
},
|
| 51055 |
+
{
|
| 51056 |
+
"epoch": 0.9910734046477314,
|
| 51057 |
+
"grad_norm": 1.5381361246109009,
|
| 51058 |
+
"learning_rate": 4.775517208788217e-09,
|
| 51059 |
+
"loss": 0.0298,
|
| 51060 |
+
"step": 6717
|
| 51061 |
+
},
|
| 51062 |
+
{
|
| 51063 |
+
"epoch": 0.9912209516783475,
|
| 51064 |
+
"grad_norm": 1.709076166152954,
|
| 51065 |
+
"learning_rate": 4.617671984420425e-09,
|
| 51066 |
+
"loss": 0.0542,
|
| 51067 |
+
"step": 6718
|
| 51068 |
+
},
|
| 51069 |
+
{
|
| 51070 |
+
"epoch": 0.9913684987089635,
|
| 51071 |
+
"grad_norm": 1.3890794515609741,
|
| 51072 |
+
"learning_rate": 4.462478811104731e-09,
|
| 51073 |
+
"loss": 0.0558,
|
| 51074 |
+
"step": 6719
|
| 51075 |
+
},
|
| 51076 |
+
{
|
| 51077 |
+
"epoch": 0.9915160457395795,
|
| 51078 |
+
"grad_norm": 0.8841899633407593,
|
| 51079 |
+
"learning_rate": 4.309937730015978e-09,
|
| 51080 |
+
"loss": 0.0172,
|
| 51081 |
+
"step": 6720
|
| 51082 |
+
},
|
| 51083 |
+
{
|
| 51084 |
+
"epoch": 0.9915160457395795,
|
| 51085 |
+
"eval_accuracy": 0.9782923299565847,
|
| 51086 |
+
"eval_f1": 0.9629629629629629,
|
| 51087 |
+
"eval_loss": 0.055379100143909454,
|
| 51088 |
+
"eval_precision": 0.9798994974874372,
|
| 51089 |
+
"eval_recall": 0.9466019417475728,
|
| 51090 |
+
"eval_runtime": 50.3029,
|
| 51091 |
+
"eval_samples_per_second": 5.785,
|
| 51092 |
+
"eval_steps_per_second": 0.199,
|
| 51093 |
+
"step": 6720
|
| 51094 |
+
},
|
| 51095 |
+
{
|
| 51096 |
+
"epoch": 0.9916635927701954,
|
| 51097 |
+
"grad_norm": 1.6927621364593506,
|
| 51098 |
+
"learning_rate": 4.160048781628456e-09,
|
| 51099 |
+
"loss": 0.0152,
|
| 51100 |
+
"step": 6721
|
| 51101 |
+
},
|
| 51102 |
+
{
|
| 51103 |
+
"epoch": 0.9918111398008115,
|
| 51104 |
+
"grad_norm": 0.853327214717865,
|
| 51105 |
+
"learning_rate": 4.012812005712574e-09,
|
| 51106 |
+
"loss": 0.0098,
|
| 51107 |
+
"step": 6722
|
| 51108 |
+
},
|
| 51109 |
+
{
|
| 51110 |
+
"epoch": 0.9919586868314275,
|
| 51111 |
+
"grad_norm": 3.5236921310424805,
|
| 51112 |
+
"learning_rate": 3.8682274413337495e-09,
|
| 51113 |
+
"loss": 0.0527,
|
| 51114 |
+
"step": 6723
|
| 51115 |
+
},
|
| 51116 |
+
{
|
| 51117 |
+
"epoch": 0.9921062338620436,
|
| 51118 |
+
"grad_norm": 2.9586477279663086,
|
| 51119 |
+
"learning_rate": 3.726295126853519e-09,
|
| 51120 |
+
"loss": 0.0441,
|
| 51121 |
+
"step": 6724
|
| 51122 |
+
},
|
| 51123 |
+
{
|
| 51124 |
+
"epoch": 0.9922537808926596,
|
| 51125 |
+
"grad_norm": 0.3246397376060486,
|
| 51126 |
+
"learning_rate": 3.5870150999317567e-09,
|
| 51127 |
+
"loss": 0.0029,
|
| 51128 |
+
"step": 6725
|
| 51129 |
+
},
|
| 51130 |
+
{
|
| 51131 |
+
"epoch": 0.9924013279232755,
|
| 51132 |
+
"grad_norm": 1.3314762115478516,
|
| 51133 |
+
"learning_rate": 3.450387397522237e-09,
|
| 51134 |
+
"loss": 0.025,
|
| 51135 |
+
"step": 6726
|
| 51136 |
+
},
|
| 51137 |
+
{
|
| 51138 |
+
"epoch": 0.9925488749538915,
|
| 51139 |
+
"grad_norm": 1.7466126680374146,
|
| 51140 |
+
"learning_rate": 3.3164120558759614e-09,
|
| 51141 |
+
"loss": 0.0336,
|
| 51142 |
+
"step": 6727
|
| 51143 |
+
},
|
| 51144 |
+
{
|
| 51145 |
+
"epoch": 0.9926964219845076,
|
| 51146 |
+
"grad_norm": 0.8817312121391296,
|
| 51147 |
+
"learning_rate": 3.185089110542272e-09,
|
| 51148 |
+
"loss": 0.0208,
|
| 51149 |
+
"step": 6728
|
| 51150 |
+
},
|
| 51151 |
+
{
|
| 51152 |
+
"epoch": 0.9928439690151236,
|
| 51153 |
+
"grad_norm": 1.9117987155914307,
|
| 51154 |
+
"learning_rate": 3.056418596362187e-09,
|
| 51155 |
+
"loss": 0.0431,
|
| 51156 |
+
"step": 6729
|
| 51157 |
+
},
|
| 51158 |
+
{
|
| 51159 |
+
"epoch": 0.9929915160457395,
|
| 51160 |
+
"grad_norm": 5.536546230316162,
|
| 51161 |
+
"learning_rate": 2.9304005474761755e-09,
|
| 51162 |
+
"loss": 0.1609,
|
| 51163 |
+
"step": 6730
|
| 51164 |
+
},
|
| 51165 |
+
{
|
| 51166 |
+
"epoch": 0.9931390630763556,
|
| 51167 |
+
"grad_norm": 4.430239677429199,
|
| 51168 |
+
"learning_rate": 2.8070349973219336e-09,
|
| 51169 |
+
"loss": 0.1037,
|
| 51170 |
+
"step": 6731
|
| 51171 |
+
},
|
| 51172 |
+
{
|
| 51173 |
+
"epoch": 0.9932866101069716,
|
| 51174 |
+
"grad_norm": 1.1493936777114868,
|
| 51175 |
+
"learning_rate": 2.6863219786299466e-09,
|
| 51176 |
+
"loss": 0.0348,
|
| 51177 |
+
"step": 6732
|
| 51178 |
+
},
|
| 51179 |
+
{
|
| 51180 |
+
"epoch": 0.9934341571375876,
|
| 51181 |
+
"grad_norm": 1.1461900472640991,
|
| 51182 |
+
"learning_rate": 2.568261523430149e-09,
|
| 51183 |
+
"loss": 0.0286,
|
| 51184 |
+
"step": 6733
|
| 51185 |
+
},
|
| 51186 |
+
{
|
| 51187 |
+
"epoch": 0.9935817041682036,
|
| 51188 |
+
"grad_norm": 1.5313574075698853,
|
| 51189 |
+
"learning_rate": 2.452853663046373e-09,
|
| 51190 |
+
"loss": 0.0368,
|
| 51191 |
+
"step": 6734
|
| 51192 |
+
},
|
| 51193 |
+
{
|
| 51194 |
+
"epoch": 0.9937292511988196,
|
| 51195 |
+
"grad_norm": 1.981681227684021,
|
| 51196 |
+
"learning_rate": 2.3400984280985695e-09,
|
| 51197 |
+
"loss": 0.0701,
|
| 51198 |
+
"step": 6735
|
| 51199 |
+
},
|
| 51200 |
+
{
|
| 51201 |
+
"epoch": 0.9938767982294356,
|
| 51202 |
+
"grad_norm": 2.576758623123169,
|
| 51203 |
+
"learning_rate": 2.229995848506139e-09,
|
| 51204 |
+
"loss": 0.0525,
|
| 51205 |
+
"step": 6736
|
| 51206 |
+
},
|
| 51207 |
+
{
|
| 51208 |
+
"epoch": 0.9940243452600517,
|
| 51209 |
+
"grad_norm": 2.1853508949279785,
|
| 51210 |
+
"learning_rate": 2.1225459534801596e-09,
|
| 51211 |
+
"loss": 0.0543,
|
| 51212 |
+
"step": 6737
|
| 51213 |
+
},
|
| 51214 |
+
{
|
| 51215 |
+
"epoch": 0.9941718922906676,
|
| 51216 |
+
"grad_norm": 3.452421188354492,
|
| 51217 |
+
"learning_rate": 2.017748771531158e-09,
|
| 51218 |
+
"loss": 0.0628,
|
| 51219 |
+
"step": 6738
|
| 51220 |
+
},
|
| 51221 |
+
{
|
| 51222 |
+
"epoch": 0.9943194393212836,
|
| 51223 |
+
"grad_norm": 1.7515523433685303,
|
| 51224 |
+
"learning_rate": 1.915604330464671e-09,
|
| 51225 |
+
"loss": 0.0685,
|
| 51226 |
+
"step": 6739
|
| 51227 |
+
},
|
| 51228 |
+
{
|
| 51229 |
+
"epoch": 0.9944669863518997,
|
| 51230 |
+
"grad_norm": 2.203991413116455,
|
| 51231 |
+
"learning_rate": 1.8161126573823517e-09,
|
| 51232 |
+
"loss": 0.0444,
|
| 51233 |
+
"step": 6740
|
| 51234 |
+
},
|
| 51235 |
+
{
|
| 51236 |
+
"epoch": 0.9944669863518997,
|
| 51237 |
+
"eval_accuracy": 0.9782923299565847,
|
| 51238 |
+
"eval_f1": 0.9629629629629629,
|
| 51239 |
+
"eval_loss": 0.055171407759189606,
|
| 51240 |
+
"eval_precision": 0.9798994974874372,
|
| 51241 |
+
"eval_recall": 0.9466019417475728,
|
| 51242 |
+
"eval_runtime": 48.6957,
|
| 51243 |
+
"eval_samples_per_second": 5.976,
|
| 51244 |
+
"eval_steps_per_second": 0.205,
|
| 51245 |
+
"step": 6740
|
| 51246 |
+
},
|
| 51247 |
+
{
|
| 51248 |
+
"epoch": 0.9946145333825157,
|
| 51249 |
+
"grad_norm": 2.4897584915161133,
|
| 51250 |
+
"learning_rate": 1.7192737786819736e-09,
|
| 51251 |
+
"loss": 0.0722,
|
| 51252 |
+
"step": 6741
|
| 51253 |
+
},
|
| 51254 |
+
{
|
| 51255 |
+
"epoch": 0.9947620804131316,
|
| 51256 |
+
"grad_norm": 1.9944440126419067,
|
| 51257 |
+
"learning_rate": 1.6250877200574278e-09,
|
| 51258 |
+
"loss": 0.0995,
|
| 51259 |
+
"step": 6742
|
| 51260 |
+
},
|
| 51261 |
+
{
|
| 51262 |
+
"epoch": 0.9949096274437477,
|
| 51263 |
+
"grad_norm": 2.042222738265991,
|
| 51264 |
+
"learning_rate": 1.5335545064998346e-09,
|
| 51265 |
+
"loss": 0.0642,
|
| 51266 |
+
"step": 6743
|
| 51267 |
+
},
|
| 51268 |
+
{
|
| 51269 |
+
"epoch": 0.9950571744743637,
|
| 51270 |
+
"grad_norm": 1.745491623878479,
|
| 51271 |
+
"learning_rate": 1.4446741622942128e-09,
|
| 51272 |
+
"loss": 0.0506,
|
| 51273 |
+
"step": 6744
|
| 51274 |
+
},
|
| 51275 |
+
{
|
| 51276 |
+
"epoch": 0.9952047215049797,
|
| 51277 |
+
"grad_norm": 0.9310182929039001,
|
| 51278 |
+
"learning_rate": 1.3584467110228095e-09,
|
| 51279 |
+
"loss": 0.0122,
|
| 51280 |
+
"step": 6745
|
| 51281 |
+
},
|
| 51282 |
+
{
|
| 51283 |
+
"epoch": 0.9953522685355958,
|
| 51284 |
+
"grad_norm": 1.8398277759552002,
|
| 51285 |
+
"learning_rate": 1.2748721755651005e-09,
|
| 51286 |
+
"loss": 0.0261,
|
| 51287 |
+
"step": 6746
|
| 51288 |
+
},
|
| 51289 |
+
{
|
| 51290 |
+
"epoch": 0.9954998155662117,
|
| 51291 |
+
"grad_norm": 1.677306890487671,
|
| 51292 |
+
"learning_rate": 1.1939505780966809e-09,
|
| 51293 |
+
"loss": 0.0358,
|
| 51294 |
+
"step": 6747
|
| 51295 |
+
},
|
| 51296 |
+
{
|
| 51297 |
+
"epoch": 0.9956473625968277,
|
| 51298 |
+
"grad_norm": 1.2638682126998901,
|
| 51299 |
+
"learning_rate": 1.115681940085933e-09,
|
| 51300 |
+
"loss": 0.0283,
|
| 51301 |
+
"step": 6748
|
| 51302 |
+
},
|
| 51303 |
+
{
|
| 51304 |
+
"epoch": 0.9957949096274438,
|
| 51305 |
+
"grad_norm": 3.8664028644561768,
|
| 51306 |
+
"learning_rate": 1.040066282300689e-09,
|
| 51307 |
+
"loss": 0.0514,
|
| 51308 |
+
"step": 6749
|
| 51309 |
+
},
|
| 51310 |
+
{
|
| 51311 |
+
"epoch": 0.9959424566580598,
|
| 51312 |
+
"grad_norm": 1.0908302068710327,
|
| 51313 |
+
"learning_rate": 9.671036248048993e-10,
|
| 51314 |
+
"loss": 0.0171,
|
| 51315 |
+
"step": 6750
|
| 51316 |
+
},
|
| 51317 |
+
{
|
| 51318 |
+
"epoch": 0.9960900036886757,
|
| 51319 |
+
"grad_norm": 4.517603874206543,
|
| 51320 |
+
"learning_rate": 8.967939869553022e-10,
|
| 51321 |
+
"loss": 0.0963,
|
| 51322 |
+
"step": 6751
|
| 51323 |
+
},
|
| 51324 |
+
{
|
| 51325 |
+
"epoch": 0.9962375507192918,
|
| 51326 |
+
"grad_norm": 9.87070369720459,
|
| 51327 |
+
"learning_rate": 8.291373874091957e-10,
|
| 51328 |
+
"loss": 0.0511,
|
| 51329 |
+
"step": 6752
|
| 51330 |
+
},
|
| 51331 |
+
{
|
| 51332 |
+
"epoch": 0.9963850977499078,
|
| 51333 |
+
"grad_norm": 1.7819442749023438,
|
| 51334 |
+
"learning_rate": 7.641338441166657e-10,
|
| 51335 |
+
"loss": 0.0142,
|
| 51336 |
+
"step": 6753
|
| 51337 |
+
},
|
| 51338 |
+
{
|
| 51339 |
+
"epoch": 0.9965326447805238,
|
| 51340 |
+
"grad_norm": 1.9164355993270874,
|
| 51341 |
+
"learning_rate": 7.017833743261371e-10,
|
| 51342 |
+
"loss": 0.0333,
|
| 51343 |
+
"step": 6754
|
| 51344 |
+
},
|
| 51345 |
+
{
|
| 51346 |
+
"epoch": 0.9966801918111398,
|
| 51347 |
+
"grad_norm": 1.178529143333435,
|
| 51348 |
+
"learning_rate": 6.420859945788227e-10,
|
| 51349 |
+
"loss": 0.0175,
|
| 51350 |
+
"step": 6755
|
| 51351 |
+
},
|
| 51352 |
+
{
|
| 51353 |
+
"epoch": 0.9968277388417558,
|
| 51354 |
+
"grad_norm": 3.305959701538086,
|
| 51355 |
+
"learning_rate": 5.850417207153847e-10,
|
| 51356 |
+
"loss": 0.0576,
|
| 51357 |
+
"step": 6756
|
| 51358 |
+
},
|
| 51359 |
+
{
|
| 51360 |
+
"epoch": 0.9969752858723718,
|
| 51361 |
+
"grad_norm": 3.1544477939605713,
|
| 51362 |
+
"learning_rate": 5.306505678714934e-10,
|
| 51363 |
+
"loss": 0.0754,
|
| 51364 |
+
"step": 6757
|
| 51365 |
+
},
|
| 51366 |
+
{
|
| 51367 |
+
"epoch": 0.9971228329029879,
|
| 51368 |
+
"grad_norm": 1.3226299285888672,
|
| 51369 |
+
"learning_rate": 4.789125504778281e-10,
|
| 51370 |
+
"loss": 0.0261,
|
| 51371 |
+
"step": 6758
|
| 51372 |
+
},
|
| 51373 |
+
{
|
| 51374 |
+
"epoch": 0.9972703799336038,
|
| 51375 |
+
"grad_norm": 3.1813669204711914,
|
| 51376 |
+
"learning_rate": 4.2982768226229597e-10,
|
| 51377 |
+
"loss": 0.1108,
|
| 51378 |
+
"step": 6759
|
| 51379 |
+
},
|
| 51380 |
+
{
|
| 51381 |
+
"epoch": 0.9974179269642198,
|
| 51382 |
+
"grad_norm": 2.4142541885375977,
|
| 51383 |
+
"learning_rate": 3.8339597624892366e-10,
|
| 51384 |
+
"loss": 0.0481,
|
| 51385 |
+
"step": 6760
|
| 51386 |
+
},
|
| 51387 |
+
{
|
| 51388 |
+
"epoch": 0.9974179269642198,
|
| 51389 |
+
"eval_accuracy": 0.9782923299565847,
|
| 51390 |
+
"eval_f1": 0.9629629629629629,
|
| 51391 |
+
"eval_loss": 0.05494352802634239,
|
| 51392 |
+
"eval_precision": 0.9798994974874372,
|
| 51393 |
+
"eval_recall": 0.9466019417475728,
|
| 51394 |
+
"eval_runtime": 48.9599,
|
| 51395 |
+
"eval_samples_per_second": 5.944,
|
| 51396 |
+
"eval_steps_per_second": 0.204,
|
| 51397 |
+
"step": 6760
|
| 51398 |
+
},
|
| 51399 |
+
{
|
| 51400 |
+
"epoch": 0.9975654739948359,
|
| 51401 |
+
"grad_norm": 1.254651665687561,
|
| 51402 |
+
"learning_rate": 3.396174447556355e-10,
|
| 51403 |
+
"loss": 0.0174,
|
| 51404 |
+
"step": 6761
|
| 51405 |
+
},
|
| 51406 |
+
{
|
| 51407 |
+
"epoch": 0.9977130210254519,
|
| 51408 |
+
"grad_norm": 2.606041669845581,
|
| 51409 |
+
"learning_rate": 2.984920994009155e-10,
|
| 51410 |
+
"loss": 0.0746,
|
| 51411 |
+
"step": 6762
|
| 51412 |
+
},
|
| 51413 |
+
{
|
| 51414 |
+
"epoch": 0.9978605680560678,
|
| 51415 |
+
"grad_norm": 6.167627334594727,
|
| 51416 |
+
"learning_rate": 2.600199510938151e-10,
|
| 51417 |
+
"loss": 0.1954,
|
| 51418 |
+
"step": 6763
|
| 51419 |
+
},
|
| 51420 |
+
{
|
| 51421 |
+
"epoch": 0.9980081150866839,
|
| 51422 |
+
"grad_norm": 2.2177047729492188,
|
| 51423 |
+
"learning_rate": 2.2420101004394512e-10,
|
| 51424 |
+
"loss": 0.0676,
|
| 51425 |
+
"step": 6764
|
| 51426 |
+
},
|
| 51427 |
+
{
|
| 51428 |
+
"epoch": 0.9981556621172999,
|
| 51429 |
+
"grad_norm": 1.7990946769714355,
|
| 51430 |
+
"learning_rate": 1.9103528575370457e-10,
|
| 51431 |
+
"loss": 0.0404,
|
| 51432 |
+
"step": 6765
|
| 51433 |
+
},
|
| 51434 |
+
{
|
| 51435 |
+
"epoch": 0.9983032091479159,
|
| 51436 |
+
"grad_norm": 1.5802024602890015,
|
| 51437 |
+
"learning_rate": 1.6052278702272107e-10,
|
| 51438 |
+
"loss": 0.0352,
|
| 51439 |
+
"step": 6766
|
| 51440 |
+
},
|
| 51441 |
+
{
|
| 51442 |
+
"epoch": 0.998450756178532,
|
| 51443 |
+
"grad_norm": 1.7018808126449585,
|
| 51444 |
+
"learning_rate": 1.3266352194785114e-10,
|
| 51445 |
+
"loss": 0.0254,
|
| 51446 |
+
"step": 6767
|
| 51447 |
+
},
|
| 51448 |
+
{
|
| 51449 |
+
"epoch": 0.9985983032091479,
|
| 51450 |
+
"grad_norm": 2.102145195007324,
|
| 51451 |
+
"learning_rate": 1.0745749792095971e-10,
|
| 51452 |
+
"loss": 0.0427,
|
| 51453 |
+
"step": 6768
|
| 51454 |
+
},
|
| 51455 |
+
{
|
| 51456 |
+
"epoch": 0.9987458502397639,
|
| 51457 |
+
"grad_norm": 2.0960288047790527,
|
| 51458 |
+
"learning_rate": 8.490472163003027e-11,
|
| 51459 |
+
"loss": 0.0562,
|
| 51460 |
+
"step": 6769
|
| 51461 |
+
},
|
| 51462 |
+
{
|
| 51463 |
+
"epoch": 0.99889339727038,
|
| 51464 |
+
"grad_norm": 2.1226682662963867,
|
| 51465 |
+
"learning_rate": 6.500519905694446e-11,
|
| 51466 |
+
"loss": 0.0399,
|
| 51467 |
+
"step": 6770
|
| 51468 |
+
},
|
| 51469 |
+
{
|
| 51470 |
+
"epoch": 0.999040944300996,
|
| 51471 |
+
"grad_norm": 2.805297374725342,
|
| 51472 |
+
"learning_rate": 4.775893548414345e-11,
|
| 51473 |
+
"loss": 0.128,
|
| 51474 |
+
"step": 6771
|
| 51475 |
+
},
|
| 51476 |
+
{
|
| 51477 |
+
"epoch": 0.9991884913316119,
|
| 51478 |
+
"grad_norm": 3.2651734352111816,
|
| 51479 |
+
"learning_rate": 3.316593548574609e-11,
|
| 51480 |
+
"loss": 0.0469,
|
| 51481 |
+
"step": 6772
|
| 51482 |
+
},
|
| 51483 |
+
{
|
| 51484 |
+
"epoch": 0.999336038362228,
|
| 51485 |
+
"grad_norm": 1.4640666246414185,
|
| 51486 |
+
"learning_rate": 2.1226202935320518e-11,
|
| 51487 |
+
"loss": 0.033,
|
| 51488 |
+
"step": 6773
|
| 51489 |
+
},
|
| 51490 |
+
{
|
| 51491 |
+
"epoch": 0.999483585392844,
|
| 51492 |
+
"grad_norm": 1.624456524848938,
|
| 51493 |
+
"learning_rate": 1.1939740999222793e-11,
|
| 51494 |
+
"loss": 0.0241,
|
| 51495 |
+
"step": 6774
|
| 51496 |
+
},
|
| 51497 |
+
{
|
| 51498 |
+
"epoch": 0.99963113242346,
|
| 51499 |
+
"grad_norm": 1.3273454904556274,
|
| 51500 |
+
"learning_rate": 5.3065521421480356e-12,
|
| 51501 |
+
"loss": 0.034,
|
| 51502 |
+
"step": 6775
|
| 51503 |
+
},
|
| 51504 |
+
{
|
| 51505 |
+
"epoch": 0.999778679454076,
|
| 51506 |
+
"grad_norm": 0.9207583069801331,
|
| 51507 |
+
"learning_rate": 1.3266381237997393e-12,
|
| 51508 |
+
"loss": 0.026,
|
| 51509 |
+
"step": 6776
|
| 51510 |
+
},
|
| 51511 |
+
{
|
| 51512 |
+
"epoch": 0.999926226484692,
|
| 51513 |
+
"grad_norm": 1.3743399381637573,
|
| 51514 |
+
"learning_rate": 0.0,
|
| 51515 |
+
"loss": 0.0357,
|
| 51516 |
+
"step": 6777
|
| 51517 |
}
|
| 51518 |
],
|
| 51519 |
"logging_steps": 1,
|
|
|
|
| 51528 |
"should_evaluate": false,
|
| 51529 |
"should_log": false,
|
| 51530 |
"should_save": true,
|
| 51531 |
+
"should_training_stop": true
|
| 51532 |
},
|
| 51533 |
"attributes": {}
|
| 51534 |
}
|
| 51535 |
},
|
| 51536 |
+
"total_flos": 2.0879468077357466e+18,
|
| 51537 |
"train_batch_size": 8,
|
| 51538 |
"trial_name": null,
|
| 51539 |
"trial_params": null
|