Training in progress, step 2100, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +763 -3
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c626d88cecaf6f37244c24626ee31bda254de73e335860f886b2be28c4358d97
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b98f82f21939a42caf63b3e60fa8693d044a5bada470fb4c47ca564bc1aa2906
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1bccfb3da16edb9ca2352f991e7e2c84949c2cebb82bdfe6dff4edb7588812b
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f1b5e474c9b591c523f4c4558a63e2fdd86f92990aa17d39609578b1c9d025a
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84eef1c8a398e669a09b130c39c3f146f2a1df5c8f58186431773f03716ad0dd
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85c2c1bfcfbe43cb98961bcf7bbee9910700d60cc94ea9e559cdcc0bfcaf1d3a
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65404a56baaeb38eea09621cc68aa2f31f268f0657702a26eb129038b9b80d1b
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3c2b908498addeec6c50ef933c786ada650e8ffdacabaf686c730cc90d5e9dd
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75d7eee0983d654dc4f4d9d0aeab1c0cc99847a413b7ee9122cbe6f31278739d
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6b89b5ae016f3558d6cf4489eb242de8fea1141c77af78593bebef95e5e45eb
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9375cbe9615de32a9bfeb48c97d58f16a884f450ceae1c1433fd9c53f512214c
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13c4476d4d3e749b45bb7cf5bd672971013f9e7d9039dbfad26020d82e32caff
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20531ddcffa25460cb7198bef6ec4382015b394eaa7700ad1ffe8c13cee7ce9f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15219,6 +15219,766 @@
|
|
15219 |
"eval_samples_per_second": 5.641,
|
15220 |
"eval_steps_per_second": 0.184,
|
15221 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15222 |
}
|
15223 |
],
|
15224 |
"logging_steps": 1,
|
@@ -15238,7 +15998,7 @@
|
|
15238 |
"attributes": {}
|
15239 |
}
|
15240 |
},
|
15241 |
-
"total_flos": 5.
|
15242 |
"train_batch_size": 8,
|
15243 |
"trial_name": null,
|
15244 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9929078014184397,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 2100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15219 |
"eval_samples_per_second": 5.641,
|
15220 |
"eval_steps_per_second": 0.184,
|
15221 |
"step": 2000
|
15222 |
+
},
|
15223 |
+
{
|
15224 |
+
"epoch": 0.9460992907801419,
|
15225 |
+
"grad_norm": 5.928063869476318,
|
15226 |
+
"learning_rate": 1.7657111276051852e-07,
|
15227 |
+
"loss": 0.2758,
|
15228 |
+
"step": 2001
|
15229 |
+
},
|
15230 |
+
{
|
15231 |
+
"epoch": 0.9465721040189126,
|
15232 |
+
"grad_norm": 5.081968784332275,
|
15233 |
+
"learning_rate": 1.734959100204281e-07,
|
15234 |
+
"loss": 0.1877,
|
15235 |
+
"step": 2002
|
15236 |
+
},
|
15237 |
+
{
|
15238 |
+
"epoch": 0.9470449172576832,
|
15239 |
+
"grad_norm": 5.424426078796387,
|
15240 |
+
"learning_rate": 1.704474879300766e-07,
|
15241 |
+
"loss": 0.216,
|
15242 |
+
"step": 2003
|
15243 |
+
},
|
15244 |
+
{
|
15245 |
+
"epoch": 0.9475177304964539,
|
15246 |
+
"grad_norm": 5.300611972808838,
|
15247 |
+
"learning_rate": 1.6742585479747388e-07,
|
15248 |
+
"loss": 0.2141,
|
15249 |
+
"step": 2004
|
15250 |
+
},
|
15251 |
+
{
|
15252 |
+
"epoch": 0.9479905437352246,
|
15253 |
+
"grad_norm": 7.5446858406066895,
|
15254 |
+
"learning_rate": 1.6443101885762812e-07,
|
15255 |
+
"loss": 0.2932,
|
15256 |
+
"step": 2005
|
15257 |
+
},
|
15258 |
+
{
|
15259 |
+
"epoch": 0.9484633569739953,
|
15260 |
+
"grad_norm": 6.073637008666992,
|
15261 |
+
"learning_rate": 1.614629882725094e-07,
|
15262 |
+
"loss": 0.2036,
|
15263 |
+
"step": 2006
|
15264 |
+
},
|
15265 |
+
{
|
15266 |
+
"epoch": 0.948936170212766,
|
15267 |
+
"grad_norm": 7.519749164581299,
|
15268 |
+
"learning_rate": 1.5852177113103606e-07,
|
15269 |
+
"loss": 0.2765,
|
15270 |
+
"step": 2007
|
15271 |
+
},
|
15272 |
+
{
|
15273 |
+
"epoch": 0.9494089834515367,
|
15274 |
+
"grad_norm": 6.116303443908691,
|
15275 |
+
"learning_rate": 1.5560737544905058e-07,
|
15276 |
+
"loss": 0.2913,
|
15277 |
+
"step": 2008
|
15278 |
+
},
|
15279 |
+
{
|
15280 |
+
"epoch": 0.9498817966903074,
|
15281 |
+
"grad_norm": 5.81624174118042,
|
15282 |
+
"learning_rate": 1.5271980916929497e-07,
|
15283 |
+
"loss": 0.2321,
|
15284 |
+
"step": 2009
|
15285 |
+
},
|
15286 |
+
{
|
15287 |
+
"epoch": 0.950354609929078,
|
15288 |
+
"grad_norm": 5.760371208190918,
|
15289 |
+
"learning_rate": 1.498590801613975e-07,
|
15290 |
+
"loss": 0.2134,
|
15291 |
+
"step": 2010
|
15292 |
+
},
|
15293 |
+
{
|
15294 |
+
"epoch": 0.9508274231678487,
|
15295 |
+
"grad_norm": 5.03253698348999,
|
15296 |
+
"learning_rate": 1.4702519622184053e-07,
|
15297 |
+
"loss": 0.2093,
|
15298 |
+
"step": 2011
|
15299 |
+
},
|
15300 |
+
{
|
15301 |
+
"epoch": 0.9513002364066194,
|
15302 |
+
"grad_norm": 4.581620693206787,
|
15303 |
+
"learning_rate": 1.4421816507394605e-07,
|
15304 |
+
"loss": 0.2063,
|
15305 |
+
"step": 2012
|
15306 |
+
},
|
15307 |
+
{
|
15308 |
+
"epoch": 0.9517730496453901,
|
15309 |
+
"grad_norm": 5.890350818634033,
|
15310 |
+
"learning_rate": 1.4143799436785233e-07,
|
15311 |
+
"loss": 0.2267,
|
15312 |
+
"step": 2013
|
15313 |
+
},
|
15314 |
+
{
|
15315 |
+
"epoch": 0.9522458628841608,
|
15316 |
+
"grad_norm": 6.05654764175415,
|
15317 |
+
"learning_rate": 1.3868469168049403e-07,
|
15318 |
+
"loss": 0.2326,
|
15319 |
+
"step": 2014
|
15320 |
+
},
|
15321 |
+
{
|
15322 |
+
"epoch": 0.9527186761229315,
|
15323 |
+
"grad_norm": 3.6070337295532227,
|
15324 |
+
"learning_rate": 1.3595826451558214e-07,
|
15325 |
+
"loss": 0.1469,
|
15326 |
+
"step": 2015
|
15327 |
+
},
|
15328 |
+
{
|
15329 |
+
"epoch": 0.9531914893617022,
|
15330 |
+
"grad_norm": 7.624080181121826,
|
15331 |
+
"learning_rate": 1.3325872030357955e-07,
|
15332 |
+
"loss": 0.2893,
|
15333 |
+
"step": 2016
|
15334 |
+
},
|
15335 |
+
{
|
15336 |
+
"epoch": 0.9536643026004729,
|
15337 |
+
"grad_norm": 6.688779354095459,
|
15338 |
+
"learning_rate": 1.3058606640168558e-07,
|
15339 |
+
"loss": 0.2668,
|
15340 |
+
"step": 2017
|
15341 |
+
},
|
15342 |
+
{
|
15343 |
+
"epoch": 0.9541371158392435,
|
15344 |
+
"grad_norm": 6.714046001434326,
|
15345 |
+
"learning_rate": 1.279403100938148e-07,
|
15346 |
+
"loss": 0.2095,
|
15347 |
+
"step": 2018
|
15348 |
+
},
|
15349 |
+
{
|
15350 |
+
"epoch": 0.9546099290780142,
|
15351 |
+
"grad_norm": 3.696683406829834,
|
15352 |
+
"learning_rate": 1.25321458590576e-07,
|
15353 |
+
"loss": 0.1431,
|
15354 |
+
"step": 2019
|
15355 |
+
},
|
15356 |
+
{
|
15357 |
+
"epoch": 0.9550827423167849,
|
15358 |
+
"grad_norm": 6.133592128753662,
|
15359 |
+
"learning_rate": 1.2272951902925211e-07,
|
15360 |
+
"loss": 0.3241,
|
15361 |
+
"step": 2020
|
15362 |
+
},
|
15363 |
+
{
|
15364 |
+
"epoch": 0.9550827423167849,
|
15365 |
+
"eval_accuracy": 0.8647450110864745,
|
15366 |
+
"eval_f1": 0.7162790697674418,
|
15367 |
+
"eval_loss": 0.2992999255657196,
|
15368 |
+
"eval_precision": 0.8700564971751412,
|
15369 |
+
"eval_recall": 0.6086956521739131,
|
15370 |
+
"eval_runtime": 48.4915,
|
15371 |
+
"eval_samples_per_second": 5.692,
|
15372 |
+
"eval_steps_per_second": 0.186,
|
15373 |
+
"step": 2020
|
15374 |
+
},
|
15375 |
+
{
|
15376 |
+
"epoch": 0.9555555555555556,
|
15377 |
+
"grad_norm": 7.198812007904053,
|
15378 |
+
"learning_rate": 1.201644984737804e-07,
|
15379 |
+
"loss": 0.2988,
|
15380 |
+
"step": 2021
|
15381 |
+
},
|
15382 |
+
{
|
15383 |
+
"epoch": 0.9560283687943263,
|
15384 |
+
"grad_norm": 4.9037322998046875,
|
15385 |
+
"learning_rate": 1.1762640391473901e-07,
|
15386 |
+
"loss": 0.2401,
|
15387 |
+
"step": 2022
|
15388 |
+
},
|
15389 |
+
{
|
15390 |
+
"epoch": 0.956501182033097,
|
15391 |
+
"grad_norm": 4.425469398498535,
|
15392 |
+
"learning_rate": 1.1511524226931914e-07,
|
15393 |
+
"loss": 0.1406,
|
15394 |
+
"step": 2023
|
15395 |
+
},
|
15396 |
+
{
|
15397 |
+
"epoch": 0.9569739952718677,
|
15398 |
+
"grad_norm": 5.938382625579834,
|
15399 |
+
"learning_rate": 1.126310203813108e-07,
|
15400 |
+
"loss": 0.2148,
|
15401 |
+
"step": 2024
|
15402 |
+
},
|
15403 |
+
{
|
15404 |
+
"epoch": 0.9574468085106383,
|
15405 |
+
"grad_norm": 7.644670486450195,
|
15406 |
+
"learning_rate": 1.1017374502108713e-07,
|
15407 |
+
"loss": 0.2778,
|
15408 |
+
"step": 2025
|
15409 |
+
},
|
15410 |
+
{
|
15411 |
+
"epoch": 0.957919621749409,
|
15412 |
+
"grad_norm": 5.539424896240234,
|
15413 |
+
"learning_rate": 1.0774342288557892e-07,
|
15414 |
+
"loss": 0.2106,
|
15415 |
+
"step": 2026
|
15416 |
+
},
|
15417 |
+
{
|
15418 |
+
"epoch": 0.9583924349881797,
|
15419 |
+
"grad_norm": 6.603002548217773,
|
15420 |
+
"learning_rate": 1.053400605982613e-07,
|
15421 |
+
"loss": 0.2815,
|
15422 |
+
"step": 2027
|
15423 |
+
},
|
15424 |
+
{
|
15425 |
+
"epoch": 0.9588652482269504,
|
15426 |
+
"grad_norm": 4.729203701019287,
|
15427 |
+
"learning_rate": 1.0296366470913477e-07,
|
15428 |
+
"loss": 0.2226,
|
15429 |
+
"step": 2028
|
15430 |
+
},
|
15431 |
+
{
|
15432 |
+
"epoch": 0.9593380614657211,
|
15433 |
+
"grad_norm": 7.116330623626709,
|
15434 |
+
"learning_rate": 1.0061424169470646e-07,
|
15435 |
+
"loss": 0.299,
|
15436 |
+
"step": 2029
|
15437 |
+
},
|
15438 |
+
{
|
15439 |
+
"epoch": 0.9598108747044918,
|
15440 |
+
"grad_norm": 6.153399467468262,
|
15441 |
+
"learning_rate": 9.829179795797339e-08,
|
15442 |
+
"loss": 0.2681,
|
15443 |
+
"step": 2030
|
15444 |
+
},
|
15445 |
+
{
|
15446 |
+
"epoch": 0.9602836879432625,
|
15447 |
+
"grad_norm": 4.379301071166992,
|
15448 |
+
"learning_rate": 9.599633982840362e-08,
|
15449 |
+
"loss": 0.1883,
|
15450 |
+
"step": 2031
|
15451 |
+
},
|
15452 |
+
{
|
15453 |
+
"epoch": 0.9607565011820332,
|
15454 |
+
"grad_norm": 5.625801086425781,
|
15455 |
+
"learning_rate": 9.372787356192181e-08,
|
15456 |
+
"loss": 0.1923,
|
15457 |
+
"step": 2032
|
15458 |
+
},
|
15459 |
+
{
|
15460 |
+
"epoch": 0.9612293144208038,
|
15461 |
+
"grad_norm": 4.8772077560424805,
|
15462 |
+
"learning_rate": 9.148640534089037e-08,
|
15463 |
+
"loss": 0.1565,
|
15464 |
+
"step": 2033
|
15465 |
+
},
|
15466 |
+
{
|
15467 |
+
"epoch": 0.9617021276595744,
|
15468 |
+
"grad_norm": 6.87009334564209,
|
15469 |
+
"learning_rate": 8.927194127408945e-08,
|
15470 |
+
"loss": 0.2341,
|
15471 |
+
"step": 2034
|
15472 |
+
},
|
15473 |
+
{
|
15474 |
+
"epoch": 0.9621749408983451,
|
15475 |
+
"grad_norm": 4.184564113616943,
|
15476 |
+
"learning_rate": 8.708448739670805e-08,
|
15477 |
+
"loss": 0.1848,
|
15478 |
+
"step": 2035
|
15479 |
+
},
|
15480 |
+
{
|
15481 |
+
"epoch": 0.9626477541371158,
|
15482 |
+
"grad_norm": 4.61867094039917,
|
15483 |
+
"learning_rate": 8.492404967031853e-08,
|
15484 |
+
"loss": 0.175,
|
15485 |
+
"step": 2036
|
15486 |
+
},
|
15487 |
+
{
|
15488 |
+
"epoch": 0.9631205673758865,
|
15489 |
+
"grad_norm": 3.9743919372558594,
|
15490 |
+
"learning_rate": 8.27906339828688e-08,
|
15491 |
+
"loss": 0.1485,
|
15492 |
+
"step": 2037
|
15493 |
+
},
|
15494 |
+
{
|
15495 |
+
"epoch": 0.9635933806146572,
|
15496 |
+
"grad_norm": 6.921072959899902,
|
15497 |
+
"learning_rate": 8.0684246148659e-08,
|
15498 |
+
"loss": 0.2734,
|
15499 |
+
"step": 2038
|
15500 |
+
},
|
15501 |
+
{
|
15502 |
+
"epoch": 0.9640661938534278,
|
15503 |
+
"grad_norm": 4.7037129402160645,
|
15504 |
+
"learning_rate": 7.860489190833043e-08,
|
15505 |
+
"loss": 0.1407,
|
15506 |
+
"step": 2039
|
15507 |
+
},
|
15508 |
+
{
|
15509 |
+
"epoch": 0.9645390070921985,
|
15510 |
+
"grad_norm": 5.145064353942871,
|
15511 |
+
"learning_rate": 7.655257692884998e-08,
|
15512 |
+
"loss": 0.2289,
|
15513 |
+
"step": 2040
|
15514 |
+
},
|
15515 |
+
{
|
15516 |
+
"epoch": 0.9645390070921985,
|
15517 |
+
"eval_accuracy": 0.8658536585365854,
|
15518 |
+
"eval_f1": 0.7192575406032483,
|
15519 |
+
"eval_loss": 0.29763469099998474,
|
15520 |
+
"eval_precision": 0.8707865168539326,
|
15521 |
+
"eval_recall": 0.6126482213438735,
|
15522 |
+
"eval_runtime": 48.2853,
|
15523 |
+
"eval_samples_per_second": 5.716,
|
15524 |
+
"eval_steps_per_second": 0.186,
|
15525 |
+
"step": 2040
|
15526 |
+
},
|
15527 |
+
{
|
15528 |
+
"epoch": 0.9650118203309692,
|
15529 |
+
"grad_norm": 5.289119243621826,
|
15530 |
+
"learning_rate": 7.452730680349019e-08,
|
15531 |
+
"loss": 0.2251,
|
15532 |
+
"step": 2041
|
15533 |
+
},
|
15534 |
+
{
|
15535 |
+
"epoch": 0.9654846335697399,
|
15536 |
+
"grad_norm": 7.4958624839782715,
|
15537 |
+
"learning_rate": 7.252908705181805e-08,
|
15538 |
+
"loss": 0.2453,
|
15539 |
+
"step": 2042
|
15540 |
+
},
|
15541 |
+
{
|
15542 |
+
"epoch": 0.9659574468085106,
|
15543 |
+
"grad_norm": 5.394641876220703,
|
15544 |
+
"learning_rate": 7.055792311967958e-08,
|
15545 |
+
"loss": 0.2879,
|
15546 |
+
"step": 2043
|
15547 |
+
},
|
15548 |
+
{
|
15549 |
+
"epoch": 0.9664302600472813,
|
15550 |
+
"grad_norm": 4.002281665802002,
|
15551 |
+
"learning_rate": 6.861382037918418e-08,
|
15552 |
+
"loss": 0.1805,
|
15553 |
+
"step": 2044
|
15554 |
+
},
|
15555 |
+
{
|
15556 |
+
"epoch": 0.966903073286052,
|
15557 |
+
"grad_norm": 5.974024295806885,
|
15558 |
+
"learning_rate": 6.669678412868919e-08,
|
15559 |
+
"loss": 0.2024,
|
15560 |
+
"step": 2045
|
15561 |
+
},
|
15562 |
+
{
|
15563 |
+
"epoch": 0.9673758865248226,
|
15564 |
+
"grad_norm": 5.801767349243164,
|
15565 |
+
"learning_rate": 6.480681959278645e-08,
|
15566 |
+
"loss": 0.2164,
|
15567 |
+
"step": 2046
|
15568 |
+
},
|
15569 |
+
{
|
15570 |
+
"epoch": 0.9678486997635933,
|
15571 |
+
"grad_norm": 4.779239177703857,
|
15572 |
+
"learning_rate": 6.29439319222891e-08,
|
15573 |
+
"loss": 0.1936,
|
15574 |
+
"step": 2047
|
15575 |
+
},
|
15576 |
+
{
|
15577 |
+
"epoch": 0.968321513002364,
|
15578 |
+
"grad_norm": 4.674015522003174,
|
15579 |
+
"learning_rate": 6.11081261942148e-08,
|
15580 |
+
"loss": 0.2035,
|
15581 |
+
"step": 2048
|
15582 |
+
},
|
15583 |
+
{
|
15584 |
+
"epoch": 0.9687943262411347,
|
15585 |
+
"grad_norm": 6.905233860015869,
|
15586 |
+
"learning_rate": 5.929940741177476e-08,
|
15587 |
+
"loss": 0.2818,
|
15588 |
+
"step": 2049
|
15589 |
+
},
|
15590 |
+
{
|
15591 |
+
"epoch": 0.9692671394799054,
|
15592 |
+
"grad_norm": 9.568391799926758,
|
15593 |
+
"learning_rate": 5.751778050435808e-08,
|
15594 |
+
"loss": 0.32,
|
15595 |
+
"step": 2050
|
15596 |
+
},
|
15597 |
+
{
|
15598 |
+
"epoch": 0.9697399527186761,
|
15599 |
+
"grad_norm": 5.665557384490967,
|
15600 |
+
"learning_rate": 5.5763250327518505e-08,
|
15601 |
+
"loss": 0.2695,
|
15602 |
+
"step": 2051
|
15603 |
+
},
|
15604 |
+
{
|
15605 |
+
"epoch": 0.9702127659574468,
|
15606 |
+
"grad_norm": 4.919648170471191,
|
15607 |
+
"learning_rate": 5.4035821662963285e-08,
|
15608 |
+
"loss": 0.2343,
|
15609 |
+
"step": 2052
|
15610 |
+
},
|
15611 |
+
{
|
15612 |
+
"epoch": 0.9706855791962175,
|
15613 |
+
"grad_norm": 3.9685451984405518,
|
15614 |
+
"learning_rate": 5.233549921853876e-08,
|
15615 |
+
"loss": 0.18,
|
15616 |
+
"step": 2053
|
15617 |
+
},
|
15618 |
+
{
|
15619 |
+
"epoch": 0.9711583924349881,
|
15620 |
+
"grad_norm": 5.1178131103515625,
|
15621 |
+
"learning_rate": 5.066228762821479e-08,
|
15622 |
+
"loss": 0.1903,
|
15623 |
+
"step": 2054
|
15624 |
+
},
|
15625 |
+
{
|
15626 |
+
"epoch": 0.9716312056737588,
|
15627 |
+
"grad_norm": 6.247317314147949,
|
15628 |
+
"learning_rate": 4.901619145207703e-08,
|
15629 |
+
"loss": 0.1892,
|
15630 |
+
"step": 2055
|
15631 |
+
},
|
15632 |
+
{
|
15633 |
+
"epoch": 0.9721040189125295,
|
15634 |
+
"grad_norm": 3.8373396396636963,
|
15635 |
+
"learning_rate": 4.7397215176311354e-08,
|
15636 |
+
"loss": 0.1359,
|
15637 |
+
"step": 2056
|
15638 |
+
},
|
15639 |
+
{
|
15640 |
+
"epoch": 0.9725768321513002,
|
15641 |
+
"grad_norm": 6.623259544372559,
|
15642 |
+
"learning_rate": 4.580536321319273e-08,
|
15643 |
+
"loss": 0.23,
|
15644 |
+
"step": 2057
|
15645 |
+
},
|
15646 |
+
{
|
15647 |
+
"epoch": 0.9730496453900709,
|
15648 |
+
"grad_norm": 5.989914894104004,
|
15649 |
+
"learning_rate": 4.424063990107308e-08,
|
15650 |
+
"loss": 0.2538,
|
15651 |
+
"step": 2058
|
15652 |
+
},
|
15653 |
+
{
|
15654 |
+
"epoch": 0.9735224586288416,
|
15655 |
+
"grad_norm": 4.51497745513916,
|
15656 |
+
"learning_rate": 4.270304950436788e-08,
|
15657 |
+
"loss": 0.1994,
|
15658 |
+
"step": 2059
|
15659 |
+
},
|
15660 |
+
{
|
15661 |
+
"epoch": 0.9739952718676123,
|
15662 |
+
"grad_norm": 4.718496799468994,
|
15663 |
+
"learning_rate": 4.119259621354843e-08,
|
15664 |
+
"loss": 0.1593,
|
15665 |
+
"step": 2060
|
15666 |
+
},
|
15667 |
+
{
|
15668 |
+
"epoch": 0.9739952718676123,
|
15669 |
+
"eval_accuracy": 0.8636363636363636,
|
15670 |
+
"eval_f1": 0.7132867132867133,
|
15671 |
+
"eval_loss": 0.29835787415504456,
|
15672 |
+
"eval_precision": 0.8693181818181818,
|
15673 |
+
"eval_recall": 0.6047430830039525,
|
15674 |
+
"eval_runtime": 48.4979,
|
15675 |
+
"eval_samples_per_second": 5.691,
|
15676 |
+
"eval_steps_per_second": 0.186,
|
15677 |
+
"step": 2060
|
15678 |
+
},
|
15679 |
+
{
|
15680 |
+
"epoch": 0.9744680851063829,
|
15681 |
+
"grad_norm": 4.2522358894348145,
|
15682 |
+
"learning_rate": 3.9709284145125205e-08,
|
15683 |
+
"loss": 0.2072,
|
15684 |
+
"step": 2061
|
15685 |
+
},
|
15686 |
+
{
|
15687 |
+
"epoch": 0.9749408983451536,
|
15688 |
+
"grad_norm": 6.090972900390625,
|
15689 |
+
"learning_rate": 3.825311734164116e-08,
|
15690 |
+
"loss": 0.227,
|
15691 |
+
"step": 2062
|
15692 |
+
},
|
15693 |
+
{
|
15694 |
+
"epoch": 0.9754137115839243,
|
15695 |
+
"grad_norm": 5.209742546081543,
|
15696 |
+
"learning_rate": 3.682409977165957e-08,
|
15697 |
+
"loss": 0.214,
|
15698 |
+
"step": 2063
|
15699 |
+
},
|
15700 |
+
{
|
15701 |
+
"epoch": 0.975886524822695,
|
15702 |
+
"grad_norm": 5.365957260131836,
|
15703 |
+
"learning_rate": 3.5422235329751756e-08,
|
15704 |
+
"loss": 0.1831,
|
15705 |
+
"step": 2064
|
15706 |
+
},
|
15707 |
+
{
|
15708 |
+
"epoch": 0.9763593380614657,
|
15709 |
+
"grad_norm": 9.389203071594238,
|
15710 |
+
"learning_rate": 3.4047527836483793e-08,
|
15711 |
+
"loss": 0.2723,
|
15712 |
+
"step": 2065
|
15713 |
+
},
|
15714 |
+
{
|
15715 |
+
"epoch": 0.9768321513002364,
|
15716 |
+
"grad_norm": 7.358561038970947,
|
15717 |
+
"learning_rate": 3.269998103841765e-08,
|
15718 |
+
"loss": 0.2694,
|
15719 |
+
"step": 2066
|
15720 |
+
},
|
15721 |
+
{
|
15722 |
+
"epoch": 0.9773049645390071,
|
15723 |
+
"grad_norm": 5.198401927947998,
|
15724 |
+
"learning_rate": 3.137959860808448e-08,
|
15725 |
+
"loss": 0.29,
|
15726 |
+
"step": 2067
|
15727 |
+
},
|
15728 |
+
{
|
15729 |
+
"epoch": 0.9777777777777777,
|
15730 |
+
"grad_norm": 5.073206901550293,
|
15731 |
+
"learning_rate": 3.008638414398801e-08,
|
15732 |
+
"loss": 0.2165,
|
15733 |
+
"step": 2068
|
15734 |
+
},
|
15735 |
+
{
|
15736 |
+
"epoch": 0.9782505910165484,
|
15737 |
+
"grad_norm": 5.652972221374512,
|
15738 |
+
"learning_rate": 2.882034117058896e-08,
|
15739 |
+
"loss": 0.2447,
|
15740 |
+
"step": 2069
|
15741 |
+
},
|
15742 |
+
{
|
15743 |
+
"epoch": 0.9787234042553191,
|
15744 |
+
"grad_norm": 5.199291229248047,
|
15745 |
+
"learning_rate": 2.7581473138296177e-08,
|
15746 |
+
"loss": 0.2055,
|
15747 |
+
"step": 2070
|
15748 |
+
},
|
15749 |
+
{
|
15750 |
+
"epoch": 0.9791962174940898,
|
15751 |
+
"grad_norm": 4.334774017333984,
|
15752 |
+
"learning_rate": 2.636978342345553e-08,
|
15753 |
+
"loss": 0.1535,
|
15754 |
+
"step": 2071
|
15755 |
+
},
|
15756 |
+
{
|
15757 |
+
"epoch": 0.9796690307328605,
|
15758 |
+
"grad_norm": 5.554661750793457,
|
15759 |
+
"learning_rate": 2.518527532834436e-08,
|
15760 |
+
"loss": 0.239,
|
15761 |
+
"step": 2072
|
15762 |
+
},
|
15763 |
+
{
|
15764 |
+
"epoch": 0.9801418439716312,
|
15765 |
+
"grad_norm": 5.669870853424072,
|
15766 |
+
"learning_rate": 2.402795208116149e-08,
|
15767 |
+
"loss": 0.2128,
|
15768 |
+
"step": 2073
|
15769 |
+
},
|
15770 |
+
{
|
15771 |
+
"epoch": 0.9806146572104019,
|
15772 |
+
"grad_norm": 5.936855316162109,
|
15773 |
+
"learning_rate": 2.2897816836014996e-08,
|
15774 |
+
"loss": 0.275,
|
15775 |
+
"step": 2074
|
15776 |
+
},
|
15777 |
+
{
|
15778 |
+
"epoch": 0.9810874704491725,
|
15779 |
+
"grad_norm": 10.341303825378418,
|
15780 |
+
"learning_rate": 2.179487267291891e-08,
|
15781 |
+
"loss": 0.3173,
|
15782 |
+
"step": 2075
|
15783 |
+
},
|
15784 |
+
{
|
15785 |
+
"epoch": 0.9815602836879432,
|
15786 |
+
"grad_norm": 6.050800323486328,
|
15787 |
+
"learning_rate": 2.071912259777875e-08,
|
15788 |
+
"loss": 0.2196,
|
15789 |
+
"step": 2076
|
15790 |
+
},
|
15791 |
+
{
|
15792 |
+
"epoch": 0.9820330969267139,
|
15793 |
+
"grad_norm": 5.055636882781982,
|
15794 |
+
"learning_rate": 1.967056954238933e-08,
|
15795 |
+
"loss": 0.181,
|
15796 |
+
"step": 2077
|
15797 |
+
},
|
15798 |
+
{
|
15799 |
+
"epoch": 0.9825059101654846,
|
15800 |
+
"grad_norm": 7.4767632484436035,
|
15801 |
+
"learning_rate": 1.864921636442252e-08,
|
15802 |
+
"loss": 0.201,
|
15803 |
+
"step": 2078
|
15804 |
+
},
|
15805 |
+
{
|
15806 |
+
"epoch": 0.9829787234042553,
|
15807 |
+
"grad_norm": 6.8587493896484375,
|
15808 |
+
"learning_rate": 1.7655065847423935e-08,
|
15809 |
+
"loss": 0.3132,
|
15810 |
+
"step": 2079
|
15811 |
+
},
|
15812 |
+
{
|
15813 |
+
"epoch": 0.983451536643026,
|
15814 |
+
"grad_norm": 7.90069580078125,
|
15815 |
+
"learning_rate": 1.6688120700798505e-08,
|
15816 |
+
"loss": 0.2018,
|
15817 |
+
"step": 2080
|
15818 |
+
},
|
15819 |
+
{
|
15820 |
+
"epoch": 0.983451536643026,
|
15821 |
+
"eval_accuracy": 0.8647450110864745,
|
15822 |
+
"eval_f1": 0.7175925925925926,
|
15823 |
+
"eval_loss": 0.29836517572402954,
|
15824 |
+
"eval_precision": 0.8659217877094972,
|
15825 |
+
"eval_recall": 0.6126482213438735,
|
15826 |
+
"eval_runtime": 48.9715,
|
15827 |
+
"eval_samples_per_second": 5.636,
|
15828 |
+
"eval_steps_per_second": 0.184,
|
15829 |
+
"step": 2080
|
15830 |
+
},
|
15831 |
+
{
|
15832 |
+
"epoch": 0.9839243498817967,
|
15833 |
+
"grad_norm": 6.26698637008667,
|
15834 |
+
"learning_rate": 1.5748383559809345e-08,
|
15835 |
+
"loss": 0.2399,
|
15836 |
+
"step": 2081
|
15837 |
+
},
|
15838 |
+
{
|
15839 |
+
"epoch": 0.9843971631205674,
|
15840 |
+
"grad_norm": 6.140974044799805,
|
15841 |
+
"learning_rate": 1.4835856985568887e-08,
|
15842 |
+
"loss": 0.2634,
|
15843 |
+
"step": 2082
|
15844 |
+
},
|
15845 |
+
{
|
15846 |
+
"epoch": 0.984869976359338,
|
15847 |
+
"grad_norm": 4.758864879608154,
|
15848 |
+
"learning_rate": 1.3950543465027777e-08,
|
15849 |
+
"loss": 0.2022,
|
15850 |
+
"step": 2083
|
15851 |
+
},
|
15852 |
+
{
|
15853 |
+
"epoch": 0.9853427895981087,
|
15854 |
+
"grad_norm": 6.061093330383301,
|
15855 |
+
"learning_rate": 1.3092445410977094e-08,
|
15856 |
+
"loss": 0.2611,
|
15857 |
+
"step": 2084
|
15858 |
+
},
|
15859 |
+
{
|
15860 |
+
"epoch": 0.9858156028368794,
|
15861 |
+
"grad_norm": 5.0369696617126465,
|
15862 |
+
"learning_rate": 1.2261565162030586e-08,
|
15863 |
+
"loss": 0.242,
|
15864 |
+
"step": 2085
|
15865 |
+
},
|
15866 |
+
{
|
15867 |
+
"epoch": 0.9862884160756501,
|
15868 |
+
"grad_norm": 4.759927272796631,
|
15869 |
+
"learning_rate": 1.1457904982627998e-08,
|
15870 |
+
"loss": 0.2424,
|
15871 |
+
"step": 2086
|
15872 |
+
},
|
15873 |
+
{
|
15874 |
+
"epoch": 0.9867612293144208,
|
15875 |
+
"grad_norm": 4.427268028259277,
|
15876 |
+
"learning_rate": 1.0681467063022866e-08,
|
15877 |
+
"loss": 0.1903,
|
15878 |
+
"step": 2087
|
15879 |
+
},
|
15880 |
+
{
|
15881 |
+
"epoch": 0.9872340425531915,
|
15882 |
+
"grad_norm": 5.498013496398926,
|
15883 |
+
"learning_rate": 9.932253519280289e-09,
|
15884 |
+
"loss": 0.1198,
|
15885 |
+
"step": 2088
|
15886 |
+
},
|
15887 |
+
{
|
15888 |
+
"epoch": 0.9877068557919622,
|
15889 |
+
"grad_norm": 5.413758754730225,
|
15890 |
+
"learning_rate": 9.210266393266942e-09,
|
15891 |
+
"loss": 0.231,
|
15892 |
+
"step": 2089
|
15893 |
+
},
|
15894 |
+
{
|
15895 |
+
"epoch": 0.9881796690307328,
|
15896 |
+
"grad_norm": 7.1858134269714355,
|
15897 |
+
"learning_rate": 8.515507652649968e-09,
|
15898 |
+
"loss": 0.26,
|
15899 |
+
"step": 2090
|
15900 |
+
},
|
15901 |
+
{
|
15902 |
+
"epoch": 0.9886524822695035,
|
15903 |
+
"grad_norm": 4.840980052947998,
|
15904 |
+
"learning_rate": 7.84797919089031e-09,
|
15905 |
+
"loss": 0.2581,
|
15906 |
+
"step": 2091
|
15907 |
+
},
|
15908 |
+
{
|
15909 |
+
"epoch": 0.9891252955082742,
|
15910 |
+
"grad_norm": 5.378105640411377,
|
15911 |
+
"learning_rate": 7.20768282723383e-09,
|
15912 |
+
"loss": 0.2107,
|
15913 |
+
"step": 2092
|
15914 |
+
},
|
15915 |
+
{
|
15916 |
+
"epoch": 0.9895981087470449,
|
15917 |
+
"grad_norm": 8.181370735168457,
|
15918 |
+
"learning_rate": 6.5946203067135395e-09,
|
15919 |
+
"loss": 0.2245,
|
15920 |
+
"step": 2093
|
15921 |
+
},
|
15922 |
+
{
|
15923 |
+
"epoch": 0.9900709219858156,
|
15924 |
+
"grad_norm": 5.936405181884766,
|
15925 |
+
"learning_rate": 6.008793300136262e-09,
|
15926 |
+
"loss": 0.1958,
|
15927 |
+
"step": 2094
|
15928 |
+
},
|
15929 |
+
{
|
15930 |
+
"epoch": 0.9905437352245863,
|
15931 |
+
"grad_norm": 6.984827995300293,
|
15932 |
+
"learning_rate": 5.450203404087084e-09,
|
15933 |
+
"loss": 0.2338,
|
15934 |
+
"step": 2095
|
15935 |
+
},
|
15936 |
+
{
|
15937 |
+
"epoch": 0.991016548463357,
|
15938 |
+
"grad_norm": 5.687265872955322,
|
15939 |
+
"learning_rate": 4.918852140916031e-09,
|
15940 |
+
"loss": 0.2498,
|
15941 |
+
"step": 2096
|
15942 |
+
},
|
15943 |
+
{
|
15944 |
+
"epoch": 0.9914893617021276,
|
15945 |
+
"grad_norm": 8.568177223205566,
|
15946 |
+
"learning_rate": 4.414740958742503e-09,
|
15947 |
+
"loss": 0.3252,
|
15948 |
+
"step": 2097
|
15949 |
+
},
|
15950 |
+
{
|
15951 |
+
"epoch": 0.9919621749408983,
|
15952 |
+
"grad_norm": 4.833063125610352,
|
15953 |
+
"learning_rate": 3.937871231444179e-09,
|
15954 |
+
"loss": 0.1798,
|
15955 |
+
"step": 2098
|
15956 |
+
},
|
15957 |
+
{
|
15958 |
+
"epoch": 0.992434988179669,
|
15959 |
+
"grad_norm": 4.7450056076049805,
|
15960 |
+
"learning_rate": 3.4882442586570143e-09,
|
15961 |
+
"loss": 0.1758,
|
15962 |
+
"step": 2099
|
15963 |
+
},
|
15964 |
+
{
|
15965 |
+
"epoch": 0.9929078014184397,
|
15966 |
+
"grad_norm": 5.54990291595459,
|
15967 |
+
"learning_rate": 3.0658612657730182e-09,
|
15968 |
+
"loss": 0.2018,
|
15969 |
+
"step": 2100
|
15970 |
+
},
|
15971 |
+
{
|
15972 |
+
"epoch": 0.9929078014184397,
|
15973 |
+
"eval_accuracy": 0.8647450110864745,
|
15974 |
+
"eval_f1": 0.7162790697674418,
|
15975 |
+
"eval_loss": 0.2974694073200226,
|
15976 |
+
"eval_precision": 0.8700564971751412,
|
15977 |
+
"eval_recall": 0.6086956521739131,
|
15978 |
+
"eval_runtime": 47.9735,
|
15979 |
+
"eval_samples_per_second": 5.753,
|
15980 |
+
"eval_steps_per_second": 0.188,
|
15981 |
+
"step": 2100
|
15982 |
}
|
15983 |
],
|
15984 |
"logging_steps": 1,
|
|
|
15998 |
"attributes": {}
|
15999 |
}
|
16000 |
},
|
16001 |
+
"total_flos": 5.377331196550185e+17,
|
16002 |
"train_batch_size": 8,
|
16003 |
"trial_name": null,
|
16004 |
"trial_params": null
|