Training in progress, step 1700, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca9ad4c45f6ed9ff141594db885ce7f7936b72c33bf7831ea51061b751c035d0
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0005a7a0bf83524bb14a93793c3fdab26d6c2653ecf1f287deeb08e8e78ca1fd
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ed9ba22411531d762cf848c2d9daddff6ee7f29ca806d5aef7f5ba9813947f0
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:371c2c2e2799dc1b7de0b141a212b857758839245977f9c58714ec11a1162c7e
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c2bdb37a902663861f07a52281ef1995bcfda4e8830c535faae292fabb659b6
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:478afb018e67b6963a405f6bfecae60632c4d7b580db98fb4a37e4698026d54a
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2f9589c38a3685a3a7913c666aa2459077a853b4e8f8a5230bce75fa99b9825
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6419cd9723247df1fda540548fd8769dc6b91a2aa84ee458e9c056ee561c4042
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5d4b484a25f92c99275105a25a5abc87d9965b9b7b7ca782045935178f7d615
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c549c91cf2b0439baf2468c247f8e2109889f720a27e0d09c9b7d5f695e49a5
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81f7b75ebe10eb5c6ecc97c93cde36ee0b594c67c95103dbdcabab169117e465
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9ada658c72a7f1c0eace40e44824bfa74094a719f3408a314ecbea87cf54304
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c8c47081e0cd48c8e1647d14b0cfbcdb2a632b234bf18bf1a619d30eef11321
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12179,6 +12179,766 @@
|
|
12179 |
"eval_samples_per_second": 5.387,
|
12180 |
"eval_steps_per_second": 0.179,
|
12181 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12182 |
}
|
12183 |
],
|
12184 |
"logging_steps": 1,
|
@@ -12198,7 +12958,7 @@
|
|
12198 |
"attributes": {}
|
12199 |
}
|
12200 |
},
|
12201 |
-
"total_flos": 4.
|
12202 |
"train_batch_size": 8,
|
12203 |
"trial_name": null,
|
12204 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8058781701825077,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 1700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12179 |
"eval_samples_per_second": 5.387,
|
12180 |
"eval_steps_per_second": 0.179,
|
12181 |
"step": 1600
|
12182 |
+
},
|
12183 |
+
{
|
12184 |
+
"epoch": 0.7589476179189382,
|
12185 |
+
"grad_norm": 2.9690921306610107,
|
12186 |
+
"learning_rate": 3.3316870025959693e-06,
|
12187 |
+
"loss": 0.1082,
|
12188 |
+
"step": 1601
|
12189 |
+
},
|
12190 |
+
{
|
12191 |
+
"epoch": 0.7594216639013984,
|
12192 |
+
"grad_norm": 5.340085506439209,
|
12193 |
+
"learning_rate": 3.3193613578646633e-06,
|
12194 |
+
"loss": 0.1429,
|
12195 |
+
"step": 1602
|
12196 |
+
},
|
12197 |
+
{
|
12198 |
+
"epoch": 0.7598957098838587,
|
12199 |
+
"grad_norm": 3.9515483379364014,
|
12200 |
+
"learning_rate": 3.307054016256912e-06,
|
12201 |
+
"loss": 0.1083,
|
12202 |
+
"step": 1603
|
12203 |
+
},
|
12204 |
+
{
|
12205 |
+
"epoch": 0.760369755866319,
|
12206 |
+
"grad_norm": 6.481595993041992,
|
12207 |
+
"learning_rate": 3.2947650114914587e-06,
|
12208 |
+
"loss": 0.189,
|
12209 |
+
"step": 1604
|
12210 |
+
},
|
12211 |
+
{
|
12212 |
+
"epoch": 0.7608438018487793,
|
12213 |
+
"grad_norm": 5.499702453613281,
|
12214 |
+
"learning_rate": 3.2824943772368213e-06,
|
12215 |
+
"loss": 0.1637,
|
12216 |
+
"step": 1605
|
12217 |
+
},
|
12218 |
+
{
|
12219 |
+
"epoch": 0.7613178478312397,
|
12220 |
+
"grad_norm": 6.298553943634033,
|
12221 |
+
"learning_rate": 3.270242147111182e-06,
|
12222 |
+
"loss": 0.1212,
|
12223 |
+
"step": 1606
|
12224 |
+
},
|
12225 |
+
{
|
12226 |
+
"epoch": 0.7617918938136999,
|
12227 |
+
"grad_norm": 3.0482068061828613,
|
12228 |
+
"learning_rate": 3.258008354682303e-06,
|
12229 |
+
"loss": 0.1139,
|
12230 |
+
"step": 1607
|
12231 |
+
},
|
12232 |
+
{
|
12233 |
+
"epoch": 0.7622659397961602,
|
12234 |
+
"grad_norm": 8.920551300048828,
|
12235 |
+
"learning_rate": 3.2457930334674304e-06,
|
12236 |
+
"loss": 0.1548,
|
12237 |
+
"step": 1608
|
12238 |
+
},
|
12239 |
+
{
|
12240 |
+
"epoch": 0.7627399857786206,
|
12241 |
+
"grad_norm": 7.286499500274658,
|
12242 |
+
"learning_rate": 3.233596216933206e-06,
|
12243 |
+
"loss": 0.1776,
|
12244 |
+
"step": 1609
|
12245 |
+
},
|
12246 |
+
{
|
12247 |
+
"epoch": 0.7632140317610808,
|
12248 |
+
"grad_norm": 12.77665901184082,
|
12249 |
+
"learning_rate": 3.2214179384955713e-06,
|
12250 |
+
"loss": 0.1825,
|
12251 |
+
"step": 1610
|
12252 |
+
},
|
12253 |
+
{
|
12254 |
+
"epoch": 0.7636880777435411,
|
12255 |
+
"grad_norm": 5.8278374671936035,
|
12256 |
+
"learning_rate": 3.209258231519682e-06,
|
12257 |
+
"loss": 0.1913,
|
12258 |
+
"step": 1611
|
12259 |
+
},
|
12260 |
+
{
|
12261 |
+
"epoch": 0.7641621237260015,
|
12262 |
+
"grad_norm": 3.0561583042144775,
|
12263 |
+
"learning_rate": 3.197117129319808e-06,
|
12264 |
+
"loss": 0.1343,
|
12265 |
+
"step": 1612
|
12266 |
+
},
|
12267 |
+
{
|
12268 |
+
"epoch": 0.7646361697084617,
|
12269 |
+
"grad_norm": 6.679983139038086,
|
12270 |
+
"learning_rate": 3.1849946651592532e-06,
|
12271 |
+
"loss": 0.1593,
|
12272 |
+
"step": 1613
|
12273 |
+
},
|
12274 |
+
{
|
12275 |
+
"epoch": 0.765110215690922,
|
12276 |
+
"grad_norm": 4.746762275695801,
|
12277 |
+
"learning_rate": 3.172890872250254e-06,
|
12278 |
+
"loss": 0.2468,
|
12279 |
+
"step": 1614
|
12280 |
+
},
|
12281 |
+
{
|
12282 |
+
"epoch": 0.7655842616733823,
|
12283 |
+
"grad_norm": 3.5384531021118164,
|
12284 |
+
"learning_rate": 3.1608057837538976e-06,
|
12285 |
+
"loss": 0.0998,
|
12286 |
+
"step": 1615
|
12287 |
+
},
|
12288 |
+
{
|
12289 |
+
"epoch": 0.7660583076558426,
|
12290 |
+
"grad_norm": 3.744356870651245,
|
12291 |
+
"learning_rate": 3.1487394327800156e-06,
|
12292 |
+
"loss": 0.1393,
|
12293 |
+
"step": 1616
|
12294 |
+
},
|
12295 |
+
{
|
12296 |
+
"epoch": 0.766532353638303,
|
12297 |
+
"grad_norm": 3.5314719676971436,
|
12298 |
+
"learning_rate": 3.136691852387116e-06,
|
12299 |
+
"loss": 0.0888,
|
12300 |
+
"step": 1617
|
12301 |
+
},
|
12302 |
+
{
|
12303 |
+
"epoch": 0.7670063996207632,
|
12304 |
+
"grad_norm": 5.413354396820068,
|
12305 |
+
"learning_rate": 3.1246630755822703e-06,
|
12306 |
+
"loss": 0.1746,
|
12307 |
+
"step": 1618
|
12308 |
+
},
|
12309 |
+
{
|
12310 |
+
"epoch": 0.7674804456032235,
|
12311 |
+
"grad_norm": 5.721497535705566,
|
12312 |
+
"learning_rate": 3.1126531353210456e-06,
|
12313 |
+
"loss": 0.1132,
|
12314 |
+
"step": 1619
|
12315 |
+
},
|
12316 |
+
{
|
12317 |
+
"epoch": 0.7679544915856839,
|
12318 |
+
"grad_norm": 6.063429355621338,
|
12319 |
+
"learning_rate": 3.1006620645073925e-06,
|
12320 |
+
"loss": 0.1388,
|
12321 |
+
"step": 1620
|
12322 |
+
},
|
12323 |
+
{
|
12324 |
+
"epoch": 0.7679544915856839,
|
12325 |
+
"eval_accuracy": 0.9951690821256038,
|
12326 |
+
"eval_f1": 0.9454545454545454,
|
12327 |
+
"eval_loss": 0.012482204474508762,
|
12328 |
+
"eval_precision": 0.896551724137931,
|
12329 |
+
"eval_recall": 1.0,
|
12330 |
+
"eval_runtime": 49.5878,
|
12331 |
+
"eval_samples_per_second": 5.465,
|
12332 |
+
"eval_steps_per_second": 0.181,
|
12333 |
+
"step": 1620
|
12334 |
+
},
|
12335 |
+
{
|
12336 |
+
"epoch": 0.7684285375681441,
|
12337 |
+
"grad_norm": 5.663280487060547,
|
12338 |
+
"learning_rate": 3.0886898959935663e-06,
|
12339 |
+
"loss": 0.1339,
|
12340 |
+
"step": 1621
|
12341 |
+
},
|
12342 |
+
{
|
12343 |
+
"epoch": 0.7689025835506044,
|
12344 |
+
"grad_norm": 3.009401321411133,
|
12345 |
+
"learning_rate": 3.0767366625800366e-06,
|
12346 |
+
"loss": 0.1137,
|
12347 |
+
"step": 1622
|
12348 |
+
},
|
12349 |
+
{
|
12350 |
+
"epoch": 0.7693766295330647,
|
12351 |
+
"grad_norm": 4.703526973724365,
|
12352 |
+
"learning_rate": 3.064802397015394e-06,
|
12353 |
+
"loss": 0.2366,
|
12354 |
+
"step": 1623
|
12355 |
+
},
|
12356 |
+
{
|
12357 |
+
"epoch": 0.769850675515525,
|
12358 |
+
"grad_norm": 3.2940542697906494,
|
12359 |
+
"learning_rate": 3.052887131996267e-06,
|
12360 |
+
"loss": 0.1395,
|
12361 |
+
"step": 1624
|
12362 |
+
},
|
12363 |
+
{
|
12364 |
+
"epoch": 0.7703247214979853,
|
12365 |
+
"grad_norm": 3.261302947998047,
|
12366 |
+
"learning_rate": 3.040990900167219e-06,
|
12367 |
+
"loss": 0.1505,
|
12368 |
+
"step": 1625
|
12369 |
+
},
|
12370 |
+
{
|
12371 |
+
"epoch": 0.7707987674804456,
|
12372 |
+
"grad_norm": 3.4305295944213867,
|
12373 |
+
"learning_rate": 3.0291137341206755e-06,
|
12374 |
+
"loss": 0.1372,
|
12375 |
+
"step": 1626
|
12376 |
+
},
|
12377 |
+
{
|
12378 |
+
"epoch": 0.7712728134629059,
|
12379 |
+
"grad_norm": 8.65300178527832,
|
12380 |
+
"learning_rate": 3.0172556663968254e-06,
|
12381 |
+
"loss": 0.1821,
|
12382 |
+
"step": 1627
|
12383 |
+
},
|
12384 |
+
{
|
12385 |
+
"epoch": 0.7717468594453663,
|
12386 |
+
"grad_norm": 5.62878942489624,
|
12387 |
+
"learning_rate": 3.0054167294835314e-06,
|
12388 |
+
"loss": 0.1512,
|
12389 |
+
"step": 1628
|
12390 |
+
},
|
12391 |
+
{
|
12392 |
+
"epoch": 0.7722209054278265,
|
12393 |
+
"grad_norm": 5.76574182510376,
|
12394 |
+
"learning_rate": 2.993596955816244e-06,
|
12395 |
+
"loss": 0.1573,
|
12396 |
+
"step": 1629
|
12397 |
+
},
|
12398 |
+
{
|
12399 |
+
"epoch": 0.7726949514102868,
|
12400 |
+
"grad_norm": 7.997915267944336,
|
12401 |
+
"learning_rate": 2.9817963777779124e-06,
|
12402 |
+
"loss": 0.2725,
|
12403 |
+
"step": 1630
|
12404 |
+
},
|
12405 |
+
{
|
12406 |
+
"epoch": 0.7731689973927471,
|
12407 |
+
"grad_norm": 3.254222869873047,
|
12408 |
+
"learning_rate": 2.970015027698895e-06,
|
12409 |
+
"loss": 0.1247,
|
12410 |
+
"step": 1631
|
12411 |
+
},
|
12412 |
+
{
|
12413 |
+
"epoch": 0.7736430433752074,
|
12414 |
+
"grad_norm": 8.073678016662598,
|
12415 |
+
"learning_rate": 2.958252937856869e-06,
|
12416 |
+
"loss": 0.1538,
|
12417 |
+
"step": 1632
|
12418 |
+
},
|
12419 |
+
{
|
12420 |
+
"epoch": 0.7741170893576677,
|
12421 |
+
"grad_norm": 2.6469109058380127,
|
12422 |
+
"learning_rate": 2.946510140476747e-06,
|
12423 |
+
"loss": 0.0928,
|
12424 |
+
"step": 1633
|
12425 |
+
},
|
12426 |
+
{
|
12427 |
+
"epoch": 0.774591135340128,
|
12428 |
+
"grad_norm": 6.9095869064331055,
|
12429 |
+
"learning_rate": 2.9347866677305814e-06,
|
12430 |
+
"loss": 0.1415,
|
12431 |
+
"step": 1634
|
12432 |
+
},
|
12433 |
+
{
|
12434 |
+
"epoch": 0.7750651813225883,
|
12435 |
+
"grad_norm": 3.802766799926758,
|
12436 |
+
"learning_rate": 2.923082551737484e-06,
|
12437 |
+
"loss": 0.1323,
|
12438 |
+
"step": 1635
|
12439 |
+
},
|
12440 |
+
{
|
12441 |
+
"epoch": 0.7755392273050485,
|
12442 |
+
"grad_norm": 4.053550720214844,
|
12443 |
+
"learning_rate": 2.911397824563533e-06,
|
12444 |
+
"loss": 0.1498,
|
12445 |
+
"step": 1636
|
12446 |
+
},
|
12447 |
+
{
|
12448 |
+
"epoch": 0.7760132732875089,
|
12449 |
+
"grad_norm": 5.973599910736084,
|
12450 |
+
"learning_rate": 2.899732518221685e-06,
|
12451 |
+
"loss": 0.149,
|
12452 |
+
"step": 1637
|
12453 |
+
},
|
12454 |
+
{
|
12455 |
+
"epoch": 0.7764873192699692,
|
12456 |
+
"grad_norm": 3.402735710144043,
|
12457 |
+
"learning_rate": 2.888086664671693e-06,
|
12458 |
+
"loss": 0.1312,
|
12459 |
+
"step": 1638
|
12460 |
+
},
|
12461 |
+
{
|
12462 |
+
"epoch": 0.7769613652524295,
|
12463 |
+
"grad_norm": 6.684436798095703,
|
12464 |
+
"learning_rate": 2.8764602958200096e-06,
|
12465 |
+
"loss": 0.1108,
|
12466 |
+
"step": 1639
|
12467 |
+
},
|
12468 |
+
{
|
12469 |
+
"epoch": 0.7774354112348898,
|
12470 |
+
"grad_norm": 3.762352466583252,
|
12471 |
+
"learning_rate": 2.8648534435197086e-06,
|
12472 |
+
"loss": 0.1221,
|
12473 |
+
"step": 1640
|
12474 |
+
},
|
12475 |
+
{
|
12476 |
+
"epoch": 0.7774354112348898,
|
12477 |
+
"eval_accuracy": 0.9959742351046699,
|
12478 |
+
"eval_f1": 0.9532710280373832,
|
12479 |
+
"eval_loss": 0.009969827719032764,
|
12480 |
+
"eval_precision": 0.9272727272727272,
|
12481 |
+
"eval_recall": 0.9807692307692307,
|
12482 |
+
"eval_runtime": 50.0594,
|
12483 |
+
"eval_samples_per_second": 5.414,
|
12484 |
+
"eval_steps_per_second": 0.18,
|
12485 |
+
"step": 1640
|
12486 |
+
},
|
12487 |
+
{
|
12488 |
+
"epoch": 0.77790945721735,
|
12489 |
+
"grad_norm": 5.541801452636719,
|
12490 |
+
"learning_rate": 2.853266139570391e-06,
|
12491 |
+
"loss": 0.1781,
|
12492 |
+
"step": 1641
|
12493 |
+
},
|
12494 |
+
{
|
12495 |
+
"epoch": 0.7783835031998104,
|
12496 |
+
"grad_norm": 5.2935638427734375,
|
12497 |
+
"learning_rate": 2.841698415718103e-06,
|
12498 |
+
"loss": 0.1746,
|
12499 |
+
"step": 1642
|
12500 |
+
},
|
12501 |
+
{
|
12502 |
+
"epoch": 0.7788575491822707,
|
12503 |
+
"grad_norm": 3.5511698722839355,
|
12504 |
+
"learning_rate": 2.8301503036552446e-06,
|
12505 |
+
"loss": 0.1303,
|
12506 |
+
"step": 1643
|
12507 |
+
},
|
12508 |
+
{
|
12509 |
+
"epoch": 0.7793315951647309,
|
12510 |
+
"grad_norm": 2.210439682006836,
|
12511 |
+
"learning_rate": 2.8186218350204865e-06,
|
12512 |
+
"loss": 0.1052,
|
12513 |
+
"step": 1644
|
12514 |
+
},
|
12515 |
+
{
|
12516 |
+
"epoch": 0.7798056411471913,
|
12517 |
+
"grad_norm": 3.1148386001586914,
|
12518 |
+
"learning_rate": 2.8071130413986814e-06,
|
12519 |
+
"loss": 0.0829,
|
12520 |
+
"step": 1645
|
12521 |
+
},
|
12522 |
+
{
|
12523 |
+
"epoch": 0.7802796871296516,
|
12524 |
+
"grad_norm": 7.042520999908447,
|
12525 |
+
"learning_rate": 2.795623954320781e-06,
|
12526 |
+
"loss": 0.2299,
|
12527 |
+
"step": 1646
|
12528 |
+
},
|
12529 |
+
{
|
12530 |
+
"epoch": 0.7807537331121118,
|
12531 |
+
"grad_norm": 4.106062889099121,
|
12532 |
+
"learning_rate": 2.7841546052637346e-06,
|
12533 |
+
"loss": 0.119,
|
12534 |
+
"step": 1647
|
12535 |
+
},
|
12536 |
+
{
|
12537 |
+
"epoch": 0.7812277790945722,
|
12538 |
+
"grad_norm": 2.969593048095703,
|
12539 |
+
"learning_rate": 2.7727050256504295e-06,
|
12540 |
+
"loss": 0.0684,
|
12541 |
+
"step": 1648
|
12542 |
+
},
|
12543 |
+
{
|
12544 |
+
"epoch": 0.7817018250770325,
|
12545 |
+
"grad_norm": 6.737387180328369,
|
12546 |
+
"learning_rate": 2.761275246849582e-06,
|
12547 |
+
"loss": 0.1164,
|
12548 |
+
"step": 1649
|
12549 |
+
},
|
12550 |
+
{
|
12551 |
+
"epoch": 0.7821758710594928,
|
12552 |
+
"grad_norm": 6.33607292175293,
|
12553 |
+
"learning_rate": 2.7498653001756615e-06,
|
12554 |
+
"loss": 0.1104,
|
12555 |
+
"step": 1650
|
12556 |
+
},
|
12557 |
+
{
|
12558 |
+
"epoch": 0.7826499170419531,
|
12559 |
+
"grad_norm": 3.347256898880005,
|
12560 |
+
"learning_rate": 2.738475216888802e-06,
|
12561 |
+
"loss": 0.1036,
|
12562 |
+
"step": 1651
|
12563 |
+
},
|
12564 |
+
{
|
12565 |
+
"epoch": 0.7831239630244133,
|
12566 |
+
"grad_norm": 3.709547281265259,
|
12567 |
+
"learning_rate": 2.7271050281947165e-06,
|
12568 |
+
"loss": 0.1436,
|
12569 |
+
"step": 1652
|
12570 |
+
},
|
12571 |
+
{
|
12572 |
+
"epoch": 0.7835980090068737,
|
12573 |
+
"grad_norm": 3.4499459266662598,
|
12574 |
+
"learning_rate": 2.7157547652446193e-06,
|
12575 |
+
"loss": 0.1515,
|
12576 |
+
"step": 1653
|
12577 |
+
},
|
12578 |
+
{
|
12579 |
+
"epoch": 0.784072054989334,
|
12580 |
+
"grad_norm": 2.6657423973083496,
|
12581 |
+
"learning_rate": 2.704424459135123e-06,
|
12582 |
+
"loss": 0.1087,
|
12583 |
+
"step": 1654
|
12584 |
+
},
|
12585 |
+
{
|
12586 |
+
"epoch": 0.7845461009717942,
|
12587 |
+
"grad_norm": 6.451166152954102,
|
12588 |
+
"learning_rate": 2.6931141409081753e-06,
|
12589 |
+
"loss": 0.2029,
|
12590 |
+
"step": 1655
|
12591 |
+
},
|
12592 |
+
{
|
12593 |
+
"epoch": 0.7850201469542546,
|
12594 |
+
"grad_norm": 4.049078464508057,
|
12595 |
+
"learning_rate": 2.681823841550947e-06,
|
12596 |
+
"loss": 0.1342,
|
12597 |
+
"step": 1656
|
12598 |
+
},
|
12599 |
+
{
|
12600 |
+
"epoch": 0.7854941929367149,
|
12601 |
+
"grad_norm": 5.632473468780518,
|
12602 |
+
"learning_rate": 2.6705535919957772e-06,
|
12603 |
+
"loss": 0.1467,
|
12604 |
+
"step": 1657
|
12605 |
+
},
|
12606 |
+
{
|
12607 |
+
"epoch": 0.7859682389191751,
|
12608 |
+
"grad_norm": 3.3033530712127686,
|
12609 |
+
"learning_rate": 2.6593034231200664e-06,
|
12610 |
+
"loss": 0.1404,
|
12611 |
+
"step": 1658
|
12612 |
+
},
|
12613 |
+
{
|
12614 |
+
"epoch": 0.7864422849016355,
|
12615 |
+
"grad_norm": 3.3128445148468018,
|
12616 |
+
"learning_rate": 2.648073365746204e-06,
|
12617 |
+
"loss": 0.1129,
|
12618 |
+
"step": 1659
|
12619 |
+
},
|
12620 |
+
{
|
12621 |
+
"epoch": 0.7869163308840957,
|
12622 |
+
"grad_norm": 5.318967342376709,
|
12623 |
+
"learning_rate": 2.6368634506414757e-06,
|
12624 |
+
"loss": 0.1571,
|
12625 |
+
"step": 1660
|
12626 |
+
},
|
12627 |
+
{
|
12628 |
+
"epoch": 0.7869163308840957,
|
12629 |
+
"eval_accuracy": 0.9959742351046699,
|
12630 |
+
"eval_f1": 0.9532710280373832,
|
12631 |
+
"eval_loss": 0.010810844600200653,
|
12632 |
+
"eval_precision": 0.9272727272727272,
|
12633 |
+
"eval_recall": 0.9807692307692307,
|
12634 |
+
"eval_runtime": 49.9177,
|
12635 |
+
"eval_samples_per_second": 5.429,
|
12636 |
+
"eval_steps_per_second": 0.18,
|
12637 |
+
"step": 1660
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.7873903768665561,
|
12641 |
+
"grad_norm": 6.077727317810059,
|
12642 |
+
"learning_rate": 2.6256737085179852e-06,
|
12643 |
+
"loss": 0.1892,
|
12644 |
+
"step": 1661
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.7878644228490164,
|
12648 |
+
"grad_norm": 5.929904460906982,
|
12649 |
+
"learning_rate": 2.614504170032567e-06,
|
12650 |
+
"loss": 0.1609,
|
12651 |
+
"step": 1662
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.7883384688314766,
|
12655 |
+
"grad_norm": 12.54429817199707,
|
12656 |
+
"learning_rate": 2.6033548657867013e-06,
|
12657 |
+
"loss": 0.149,
|
12658 |
+
"step": 1663
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.788812514813937,
|
12662 |
+
"grad_norm": 3.4696834087371826,
|
12663 |
+
"learning_rate": 2.5922258263264366e-06,
|
12664 |
+
"loss": 0.1037,
|
12665 |
+
"step": 1664
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.7892865607963973,
|
12669 |
+
"grad_norm": 3.9441494941711426,
|
12670 |
+
"learning_rate": 2.581117082142296e-06,
|
12671 |
+
"loss": 0.1487,
|
12672 |
+
"step": 1665
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.7897606067788575,
|
12676 |
+
"grad_norm": 3.3771462440490723,
|
12677 |
+
"learning_rate": 2.570028663669204e-06,
|
12678 |
+
"loss": 0.0966,
|
12679 |
+
"step": 1666
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.7902346527613179,
|
12683 |
+
"grad_norm": 5.6400604248046875,
|
12684 |
+
"learning_rate": 2.5589606012863968e-06,
|
12685 |
+
"loss": 0.1358,
|
12686 |
+
"step": 1667
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.7907086987437781,
|
12690 |
+
"grad_norm": 3.4519641399383545,
|
12691 |
+
"learning_rate": 2.547912925317334e-06,
|
12692 |
+
"loss": 0.0834,
|
12693 |
+
"step": 1668
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.7911827447262384,
|
12697 |
+
"grad_norm": 7.2654242515563965,
|
12698 |
+
"learning_rate": 2.5368856660296327e-06,
|
12699 |
+
"loss": 0.1244,
|
12700 |
+
"step": 1669
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.7916567907086988,
|
12704 |
+
"grad_norm": 6.323776721954346,
|
12705 |
+
"learning_rate": 2.5258788536349622e-06,
|
12706 |
+
"loss": 0.1153,
|
12707 |
+
"step": 1670
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.792130836691159,
|
12711 |
+
"grad_norm": 8.622234344482422,
|
12712 |
+
"learning_rate": 2.514892518288988e-06,
|
12713 |
+
"loss": 0.2104,
|
12714 |
+
"step": 1671
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.7926048826736194,
|
12718 |
+
"grad_norm": 3.370286703109741,
|
12719 |
+
"learning_rate": 2.503926690091263e-06,
|
12720 |
+
"loss": 0.0609,
|
12721 |
+
"step": 1672
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.7930789286560797,
|
12725 |
+
"grad_norm": 5.871740818023682,
|
12726 |
+
"learning_rate": 2.492981399085157e-06,
|
12727 |
+
"loss": 0.1789,
|
12728 |
+
"step": 1673
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.7935529746385399,
|
12732 |
+
"grad_norm": 5.285881519317627,
|
12733 |
+
"learning_rate": 2.482056675257776e-06,
|
12734 |
+
"loss": 0.1565,
|
12735 |
+
"step": 1674
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.7940270206210003,
|
12739 |
+
"grad_norm": 6.630995273590088,
|
12740 |
+
"learning_rate": 2.471152548539876e-06,
|
12741 |
+
"loss": 0.176,
|
12742 |
+
"step": 1675
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.7945010666034605,
|
12746 |
+
"grad_norm": 2.7057905197143555,
|
12747 |
+
"learning_rate": 2.4602690488057836e-06,
|
12748 |
+
"loss": 0.0897,
|
12749 |
+
"step": 1676
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.7949751125859208,
|
12753 |
+
"grad_norm": 3.194324493408203,
|
12754 |
+
"learning_rate": 2.4494062058733157e-06,
|
12755 |
+
"loss": 0.1121,
|
12756 |
+
"step": 1677
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.7954491585683812,
|
12760 |
+
"grad_norm": 7.977220058441162,
|
12761 |
+
"learning_rate": 2.438564049503688e-06,
|
12762 |
+
"loss": 0.1833,
|
12763 |
+
"step": 1678
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.7959232045508414,
|
12767 |
+
"grad_norm": 4.833785057067871,
|
12768 |
+
"learning_rate": 2.4277426094014457e-06,
|
12769 |
+
"loss": 0.1875,
|
12770 |
+
"step": 1679
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.7963972505333017,
|
12774 |
+
"grad_norm": 5.799574375152588,
|
12775 |
+
"learning_rate": 2.416941915214377e-06,
|
12776 |
+
"loss": 0.1472,
|
12777 |
+
"step": 1680
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.7963972505333017,
|
12781 |
+
"eval_accuracy": 0.9943639291465378,
|
12782 |
+
"eval_f1": 0.9357798165137615,
|
12783 |
+
"eval_loss": 0.011454065330326557,
|
12784 |
+
"eval_precision": 0.8947368421052632,
|
12785 |
+
"eval_recall": 0.9807692307692307,
|
12786 |
+
"eval_runtime": 49.6644,
|
12787 |
+
"eval_samples_per_second": 5.457,
|
12788 |
+
"eval_steps_per_second": 0.181,
|
12789 |
+
"step": 1680
|
12790 |
+
},
|
12791 |
+
{
|
12792 |
+
"epoch": 0.796871296515762,
|
12793 |
+
"grad_norm": 5.203197956085205,
|
12794 |
+
"learning_rate": 2.4061619965334314e-06,
|
12795 |
+
"loss": 0.1582,
|
12796 |
+
"step": 1681
|
12797 |
+
},
|
12798 |
+
{
|
12799 |
+
"epoch": 0.7973453424982223,
|
12800 |
+
"grad_norm": 8.293927192687988,
|
12801 |
+
"learning_rate": 2.395402882892639e-06,
|
12802 |
+
"loss": 0.2625,
|
12803 |
+
"step": 1682
|
12804 |
+
},
|
12805 |
+
{
|
12806 |
+
"epoch": 0.7978193884806827,
|
12807 |
+
"grad_norm": 4.733770847320557,
|
12808 |
+
"learning_rate": 2.3846646037690304e-06,
|
12809 |
+
"loss": 0.1162,
|
12810 |
+
"step": 1683
|
12811 |
+
},
|
12812 |
+
{
|
12813 |
+
"epoch": 0.7982934344631429,
|
12814 |
+
"grad_norm": 3.6557698249816895,
|
12815 |
+
"learning_rate": 2.3739471885825536e-06,
|
12816 |
+
"loss": 0.142,
|
12817 |
+
"step": 1684
|
12818 |
+
},
|
12819 |
+
{
|
12820 |
+
"epoch": 0.7987674804456032,
|
12821 |
+
"grad_norm": 5.944900989532471,
|
12822 |
+
"learning_rate": 2.363250666695999e-06,
|
12823 |
+
"loss": 0.1202,
|
12824 |
+
"step": 1685
|
12825 |
+
},
|
12826 |
+
{
|
12827 |
+
"epoch": 0.7992415264280636,
|
12828 |
+
"grad_norm": 3.3309900760650635,
|
12829 |
+
"learning_rate": 2.3525750674149094e-06,
|
12830 |
+
"loss": 0.1227,
|
12831 |
+
"step": 1686
|
12832 |
+
},
|
12833 |
+
{
|
12834 |
+
"epoch": 0.7997155724105238,
|
12835 |
+
"grad_norm": 5.317230224609375,
|
12836 |
+
"learning_rate": 2.34192041998751e-06,
|
12837 |
+
"loss": 0.1406,
|
12838 |
+
"step": 1687
|
12839 |
+
},
|
12840 |
+
{
|
12841 |
+
"epoch": 0.8001896183929841,
|
12842 |
+
"grad_norm": 4.319701671600342,
|
12843 |
+
"learning_rate": 2.331286753604621e-06,
|
12844 |
+
"loss": 0.1916,
|
12845 |
+
"step": 1688
|
12846 |
+
},
|
12847 |
+
{
|
12848 |
+
"epoch": 0.8006636643754445,
|
12849 |
+
"grad_norm": 4.4361982345581055,
|
12850 |
+
"learning_rate": 2.3206740973995823e-06,
|
12851 |
+
"loss": 0.1844,
|
12852 |
+
"step": 1689
|
12853 |
+
},
|
12854 |
+
{
|
12855 |
+
"epoch": 0.8011377103579047,
|
12856 |
+
"grad_norm": 3.2999582290649414,
|
12857 |
+
"learning_rate": 2.3100824804481703e-06,
|
12858 |
+
"loss": 0.0952,
|
12859 |
+
"step": 1690
|
12860 |
+
},
|
12861 |
+
{
|
12862 |
+
"epoch": 0.801611756340365,
|
12863 |
+
"grad_norm": 7.211174964904785,
|
12864 |
+
"learning_rate": 2.29951193176852e-06,
|
12865 |
+
"loss": 0.1072,
|
12866 |
+
"step": 1691
|
12867 |
+
},
|
12868 |
+
{
|
12869 |
+
"epoch": 0.8020858023228253,
|
12870 |
+
"grad_norm": 5.33006477355957,
|
12871 |
+
"learning_rate": 2.2889624803210453e-06,
|
12872 |
+
"loss": 0.1978,
|
12873 |
+
"step": 1692
|
12874 |
+
},
|
12875 |
+
{
|
12876 |
+
"epoch": 0.8025598483052856,
|
12877 |
+
"grad_norm": 5.028670787811279,
|
12878 |
+
"learning_rate": 2.2784341550083577e-06,
|
12879 |
+
"loss": 0.0922,
|
12880 |
+
"step": 1693
|
12881 |
+
},
|
12882 |
+
{
|
12883 |
+
"epoch": 0.803033894287746,
|
12884 |
+
"grad_norm": 5.079577445983887,
|
12885 |
+
"learning_rate": 2.2679269846751915e-06,
|
12886 |
+
"loss": 0.1134,
|
12887 |
+
"step": 1694
|
12888 |
+
},
|
12889 |
+
{
|
12890 |
+
"epoch": 0.8035079402702062,
|
12891 |
+
"grad_norm": 3.310760974884033,
|
12892 |
+
"learning_rate": 2.2574409981083224e-06,
|
12893 |
+
"loss": 0.0928,
|
12894 |
+
"step": 1695
|
12895 |
+
},
|
12896 |
+
{
|
12897 |
+
"epoch": 0.8039819862526665,
|
12898 |
+
"grad_norm": 5.977758884429932,
|
12899 |
+
"learning_rate": 2.2469762240364847e-06,
|
12900 |
+
"loss": 0.1011,
|
12901 |
+
"step": 1696
|
12902 |
+
},
|
12903 |
+
{
|
12904 |
+
"epoch": 0.8044560322351268,
|
12905 |
+
"grad_norm": 6.029415607452393,
|
12906 |
+
"learning_rate": 2.236532691130299e-06,
|
12907 |
+
"loss": 0.1699,
|
12908 |
+
"step": 1697
|
12909 |
+
},
|
12910 |
+
{
|
12911 |
+
"epoch": 0.8049300782175871,
|
12912 |
+
"grad_norm": 9.231821060180664,
|
12913 |
+
"learning_rate": 2.2261104280021937e-06,
|
12914 |
+
"loss": 0.2549,
|
12915 |
+
"step": 1698
|
12916 |
+
},
|
12917 |
+
{
|
12918 |
+
"epoch": 0.8054041242000474,
|
12919 |
+
"grad_norm": 2.8385801315307617,
|
12920 |
+
"learning_rate": 2.215709463206316e-06,
|
12921 |
+
"loss": 0.0953,
|
12922 |
+
"step": 1699
|
12923 |
+
},
|
12924 |
+
{
|
12925 |
+
"epoch": 0.8058781701825077,
|
12926 |
+
"grad_norm": 6.947047233581543,
|
12927 |
+
"learning_rate": 2.205329825238467e-06,
|
12928 |
+
"loss": 0.1236,
|
12929 |
+
"step": 1700
|
12930 |
+
},
|
12931 |
+
{
|
12932 |
+
"epoch": 0.8058781701825077,
|
12933 |
+
"eval_accuracy": 0.9935587761674718,
|
12934 |
+
"eval_f1": 0.9272727272727272,
|
12935 |
+
"eval_loss": 0.012126692570745945,
|
12936 |
+
"eval_precision": 0.8793103448275862,
|
12937 |
+
"eval_recall": 0.9807692307692307,
|
12938 |
+
"eval_runtime": 49.2509,
|
12939 |
+
"eval_samples_per_second": 5.502,
|
12940 |
+
"eval_steps_per_second": 0.183,
|
12941 |
+
"step": 1700
|
12942 |
}
|
12943 |
],
|
12944 |
"logging_steps": 1,
|
|
|
12958 |
"attributes": {}
|
12959 |
}
|
12960 |
},
|
12961 |
+
"total_flos": 4.5441970776047616e+17,
|
12962 |
"train_batch_size": 8,
|
12963 |
"trial_name": null,
|
12964 |
"trial_params": null
|