Training in progress, step 11000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1856040378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d97a67c436d0aac8c51ef6e3c27fb63e9c89e825da3da26d0db7316a8b043cb5
|
3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 928000378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bfe4534d71a931e71c77546b6b1db5867a3a90fa4307542a6e49a6954fa531e
|
3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfed6543f5d672f101f42c0ba9d247e90cad9ca642dfb74b70cd76cc56d808d0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e74c51e6198addd9d761a50ec2f9cc276c644808893fb0ce5a8bf14675cb23a
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "model/chessformer-3/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -35167,6 +35167,3522 @@
|
|
35167 |
"eval_samples_per_second": 557.83,
|
35168 |
"eval_steps_per_second": 69.749,
|
35169 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35170 |
}
|
35171 |
],
|
35172 |
"logging_steps": 2,
|
@@ -35186,7 +38702,7 @@
|
|
35186 |
"attributes": {}
|
35187 |
}
|
35188 |
},
|
35189 |
-
"total_flos": 2.
|
35190 |
"train_batch_size": 768,
|
35191 |
"trial_name": null,
|
35192 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4820914268493652,
|
3 |
+
"best_model_checkpoint": "model/chessformer-3/checkpoint-11000",
|
4 |
+
"epoch": 0.4890845226979681,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 11000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
35167 |
"eval_samples_per_second": 557.83,
|
35168 |
"eval_steps_per_second": 69.749,
|
35169 |
"step": 10000
|
35170 |
+
},
|
35171 |
+
{
|
35172 |
+
"epoch": 0.4447112178204615,
|
35173 |
+
"grad_norm": 0.07122799009084702,
|
35174 |
+
"learning_rate": 0.0006260291775788085,
|
35175 |
+
"loss": 1.5277,
|
35176 |
+
"step": 10002
|
35177 |
+
},
|
35178 |
+
{
|
35179 |
+
"epoch": 0.4448001422791339,
|
35180 |
+
"grad_norm": 0.06492965668439865,
|
35181 |
+
"learning_rate": 0.0006258877037424691,
|
35182 |
+
"loss": 1.5261,
|
35183 |
+
"step": 10004
|
35184 |
+
},
|
35185 |
+
{
|
35186 |
+
"epoch": 0.44488906673780626,
|
35187 |
+
"grad_norm": 0.06462486833333969,
|
35188 |
+
"learning_rate": 0.0006257462191446941,
|
35189 |
+
"loss": 1.5268,
|
35190 |
+
"step": 10006
|
35191 |
+
},
|
35192 |
+
{
|
35193 |
+
"epoch": 0.4449779911964786,
|
35194 |
+
"grad_norm": 0.06561804562807083,
|
35195 |
+
"learning_rate": 0.0006256047237975782,
|
35196 |
+
"loss": 1.5237,
|
35197 |
+
"step": 10008
|
35198 |
+
},
|
35199 |
+
{
|
35200 |
+
"epoch": 0.44506691565515094,
|
35201 |
+
"grad_norm": 0.06555454432964325,
|
35202 |
+
"learning_rate": 0.0006254632177132169,
|
35203 |
+
"loss": 1.5244,
|
35204 |
+
"step": 10010
|
35205 |
+
},
|
35206 |
+
{
|
35207 |
+
"epoch": 0.4451558401138233,
|
35208 |
+
"grad_norm": 0.06676924228668213,
|
35209 |
+
"learning_rate": 0.0006253217009037068,
|
35210 |
+
"loss": 1.5251,
|
35211 |
+
"step": 10012
|
35212 |
+
},
|
35213 |
+
{
|
35214 |
+
"epoch": 0.4452447645724957,
|
35215 |
+
"grad_norm": 0.06575804203748703,
|
35216 |
+
"learning_rate": 0.0006251801733811455,
|
35217 |
+
"loss": 1.5222,
|
35218 |
+
"step": 10014
|
35219 |
+
},
|
35220 |
+
{
|
35221 |
+
"epoch": 0.44533368903116805,
|
35222 |
+
"grad_norm": 0.06349354237318039,
|
35223 |
+
"learning_rate": 0.0006250386351576314,
|
35224 |
+
"loss": 1.5211,
|
35225 |
+
"step": 10016
|
35226 |
+
},
|
35227 |
+
{
|
35228 |
+
"epoch": 0.44542261348984036,
|
35229 |
+
"grad_norm": 0.06419903039932251,
|
35230 |
+
"learning_rate": 0.0006248970862452637,
|
35231 |
+
"loss": 1.5216,
|
35232 |
+
"step": 10018
|
35233 |
+
},
|
35234 |
+
{
|
35235 |
+
"epoch": 0.4455115379485127,
|
35236 |
+
"grad_norm": 0.06334590911865234,
|
35237 |
+
"learning_rate": 0.0006247555266561425,
|
35238 |
+
"loss": 1.5178,
|
35239 |
+
"step": 10020
|
35240 |
+
},
|
35241 |
+
{
|
35242 |
+
"epoch": 0.4456004624071851,
|
35243 |
+
"grad_norm": 0.06277309358119965,
|
35244 |
+
"learning_rate": 0.0006246139564023693,
|
35245 |
+
"loss": 1.5201,
|
35246 |
+
"step": 10022
|
35247 |
+
},
|
35248 |
+
{
|
35249 |
+
"epoch": 0.44568938686585746,
|
35250 |
+
"grad_norm": 0.06414992362260818,
|
35251 |
+
"learning_rate": 0.0006244723754960459,
|
35252 |
+
"loss": 1.5213,
|
35253 |
+
"step": 10024
|
35254 |
+
},
|
35255 |
+
{
|
35256 |
+
"epoch": 0.44577831132452983,
|
35257 |
+
"grad_norm": 0.06549572199583054,
|
35258 |
+
"learning_rate": 0.0006243307839492752,
|
35259 |
+
"loss": 1.5174,
|
35260 |
+
"step": 10026
|
35261 |
+
},
|
35262 |
+
{
|
35263 |
+
"epoch": 0.44586723578320214,
|
35264 |
+
"grad_norm": 0.0640779510140419,
|
35265 |
+
"learning_rate": 0.0006241891817741613,
|
35266 |
+
"loss": 1.5167,
|
35267 |
+
"step": 10028
|
35268 |
+
},
|
35269 |
+
{
|
35270 |
+
"epoch": 0.4459561602418745,
|
35271 |
+
"grad_norm": 0.06481896340847015,
|
35272 |
+
"learning_rate": 0.0006240475689828086,
|
35273 |
+
"loss": 1.5147,
|
35274 |
+
"step": 10030
|
35275 |
+
},
|
35276 |
+
{
|
35277 |
+
"epoch": 0.4460450847005469,
|
35278 |
+
"grad_norm": 0.06547030061483383,
|
35279 |
+
"learning_rate": 0.0006239059455873235,
|
35280 |
+
"loss": 1.5214,
|
35281 |
+
"step": 10032
|
35282 |
+
},
|
35283 |
+
{
|
35284 |
+
"epoch": 0.44613400915921925,
|
35285 |
+
"grad_norm": 0.061458081007003784,
|
35286 |
+
"learning_rate": 0.0006237643115998119,
|
35287 |
+
"loss": 1.5146,
|
35288 |
+
"step": 10034
|
35289 |
+
},
|
35290 |
+
{
|
35291 |
+
"epoch": 0.4462229336178916,
|
35292 |
+
"grad_norm": 0.06149187311530113,
|
35293 |
+
"learning_rate": 0.0006236226670323816,
|
35294 |
+
"loss": 1.5123,
|
35295 |
+
"step": 10036
|
35296 |
+
},
|
35297 |
+
{
|
35298 |
+
"epoch": 0.446311858076564,
|
35299 |
+
"grad_norm": 0.06510701775550842,
|
35300 |
+
"learning_rate": 0.0006234810118971408,
|
35301 |
+
"loss": 1.5149,
|
35302 |
+
"step": 10038
|
35303 |
+
},
|
35304 |
+
{
|
35305 |
+
"epoch": 0.4464007825352363,
|
35306 |
+
"grad_norm": 0.061700914055109024,
|
35307 |
+
"learning_rate": 0.0006233393462061989,
|
35308 |
+
"loss": 1.5157,
|
35309 |
+
"step": 10040
|
35310 |
+
},
|
35311 |
+
{
|
35312 |
+
"epoch": 0.44648970699390866,
|
35313 |
+
"grad_norm": 0.06388545781373978,
|
35314 |
+
"learning_rate": 0.0006231976699716664,
|
35315 |
+
"loss": 1.5221,
|
35316 |
+
"step": 10042
|
35317 |
+
},
|
35318 |
+
{
|
35319 |
+
"epoch": 0.44657863145258103,
|
35320 |
+
"grad_norm": 0.06275127828121185,
|
35321 |
+
"learning_rate": 0.0006230559832056539,
|
35322 |
+
"loss": 1.5054,
|
35323 |
+
"step": 10044
|
35324 |
+
},
|
35325 |
+
{
|
35326 |
+
"epoch": 0.4466675559112534,
|
35327 |
+
"grad_norm": 0.06381896138191223,
|
35328 |
+
"learning_rate": 0.0006229142859202739,
|
35329 |
+
"loss": 1.5148,
|
35330 |
+
"step": 10046
|
35331 |
+
},
|
35332 |
+
{
|
35333 |
+
"epoch": 0.44675648036992577,
|
35334 |
+
"grad_norm": 0.06251713633537292,
|
35335 |
+
"learning_rate": 0.0006227725781276389,
|
35336 |
+
"loss": 1.5142,
|
35337 |
+
"step": 10048
|
35338 |
+
},
|
35339 |
+
{
|
35340 |
+
"epoch": 0.4468454048285981,
|
35341 |
+
"grad_norm": 0.06425853818655014,
|
35342 |
+
"learning_rate": 0.000622630859839863,
|
35343 |
+
"loss": 1.5165,
|
35344 |
+
"step": 10050
|
35345 |
+
},
|
35346 |
+
{
|
35347 |
+
"epoch": 0.44693432928727045,
|
35348 |
+
"grad_norm": 0.06350740045309067,
|
35349 |
+
"learning_rate": 0.0006224891310690606,
|
35350 |
+
"loss": 1.5139,
|
35351 |
+
"step": 10052
|
35352 |
+
},
|
35353 |
+
{
|
35354 |
+
"epoch": 0.4470232537459428,
|
35355 |
+
"grad_norm": 0.06478399783372879,
|
35356 |
+
"learning_rate": 0.0006223473918273477,
|
35357 |
+
"loss": 1.5146,
|
35358 |
+
"step": 10054
|
35359 |
+
},
|
35360 |
+
{
|
35361 |
+
"epoch": 0.4471121782046152,
|
35362 |
+
"grad_norm": 0.06344152241945267,
|
35363 |
+
"learning_rate": 0.0006222056421268405,
|
35364 |
+
"loss": 1.5167,
|
35365 |
+
"step": 10056
|
35366 |
+
},
|
35367 |
+
{
|
35368 |
+
"epoch": 0.44720110266328755,
|
35369 |
+
"grad_norm": 0.06286389380693436,
|
35370 |
+
"learning_rate": 0.0006220638819796565,
|
35371 |
+
"loss": 1.5089,
|
35372 |
+
"step": 10058
|
35373 |
+
},
|
35374 |
+
{
|
35375 |
+
"epoch": 0.4472900271219599,
|
35376 |
+
"grad_norm": 0.06424534320831299,
|
35377 |
+
"learning_rate": 0.0006219221113979138,
|
35378 |
+
"loss": 1.5131,
|
35379 |
+
"step": 10060
|
35380 |
+
},
|
35381 |
+
{
|
35382 |
+
"epoch": 0.44737895158063223,
|
35383 |
+
"grad_norm": 0.061146993190050125,
|
35384 |
+
"learning_rate": 0.0006217803303937319,
|
35385 |
+
"loss": 1.5113,
|
35386 |
+
"step": 10062
|
35387 |
+
},
|
35388 |
+
{
|
35389 |
+
"epoch": 0.4474678760393046,
|
35390 |
+
"grad_norm": 0.06360126286745071,
|
35391 |
+
"learning_rate": 0.0006216385389792306,
|
35392 |
+
"loss": 1.5176,
|
35393 |
+
"step": 10064
|
35394 |
+
},
|
35395 |
+
{
|
35396 |
+
"epoch": 0.44755680049797697,
|
35397 |
+
"grad_norm": 0.06215475872159004,
|
35398 |
+
"learning_rate": 0.0006214967371665309,
|
35399 |
+
"loss": 1.5132,
|
35400 |
+
"step": 10066
|
35401 |
+
},
|
35402 |
+
{
|
35403 |
+
"epoch": 0.44764572495664934,
|
35404 |
+
"grad_norm": 0.06413223594427109,
|
35405 |
+
"learning_rate": 0.0006213549249677548,
|
35406 |
+
"loss": 1.5158,
|
35407 |
+
"step": 10068
|
35408 |
+
},
|
35409 |
+
{
|
35410 |
+
"epoch": 0.4477346494153217,
|
35411 |
+
"grad_norm": 0.06347139924764633,
|
35412 |
+
"learning_rate": 0.000621213102395025,
|
35413 |
+
"loss": 1.5089,
|
35414 |
+
"step": 10070
|
35415 |
+
},
|
35416 |
+
{
|
35417 |
+
"epoch": 0.447823573873994,
|
35418 |
+
"grad_norm": 0.06259430944919586,
|
35419 |
+
"learning_rate": 0.0006210712694604647,
|
35420 |
+
"loss": 1.5121,
|
35421 |
+
"step": 10072
|
35422 |
+
},
|
35423 |
+
{
|
35424 |
+
"epoch": 0.4479124983326664,
|
35425 |
+
"grad_norm": 0.06153608858585358,
|
35426 |
+
"learning_rate": 0.0006209294261761989,
|
35427 |
+
"loss": 1.5149,
|
35428 |
+
"step": 10074
|
35429 |
+
},
|
35430 |
+
{
|
35431 |
+
"epoch": 0.44800142279133875,
|
35432 |
+
"grad_norm": 0.06388126313686371,
|
35433 |
+
"learning_rate": 0.000620787572554353,
|
35434 |
+
"loss": 1.5123,
|
35435 |
+
"step": 10076
|
35436 |
+
},
|
35437 |
+
{
|
35438 |
+
"epoch": 0.4480903472500111,
|
35439 |
+
"grad_norm": 0.06113690882921219,
|
35440 |
+
"learning_rate": 0.0006206457086070531,
|
35441 |
+
"loss": 1.5124,
|
35442 |
+
"step": 10078
|
35443 |
+
},
|
35444 |
+
{
|
35445 |
+
"epoch": 0.4481792717086835,
|
35446 |
+
"grad_norm": 0.06311555206775665,
|
35447 |
+
"learning_rate": 0.000620503834346426,
|
35448 |
+
"loss": 1.5159,
|
35449 |
+
"step": 10080
|
35450 |
+
},
|
35451 |
+
{
|
35452 |
+
"epoch": 0.4482681961673558,
|
35453 |
+
"grad_norm": 0.06283438205718994,
|
35454 |
+
"learning_rate": 0.0006203619497846005,
|
35455 |
+
"loss": 1.5146,
|
35456 |
+
"step": 10082
|
35457 |
+
},
|
35458 |
+
{
|
35459 |
+
"epoch": 0.44835712062602817,
|
35460 |
+
"grad_norm": 0.06206725165247917,
|
35461 |
+
"learning_rate": 0.0006202200549337048,
|
35462 |
+
"loss": 1.5096,
|
35463 |
+
"step": 10084
|
35464 |
+
},
|
35465 |
+
{
|
35466 |
+
"epoch": 0.44844604508470054,
|
35467 |
+
"grad_norm": 0.062284741550683975,
|
35468 |
+
"learning_rate": 0.0006200781498058695,
|
35469 |
+
"loss": 1.5088,
|
35470 |
+
"step": 10086
|
35471 |
+
},
|
35472 |
+
{
|
35473 |
+
"epoch": 0.4485349695433729,
|
35474 |
+
"grad_norm": 0.06355787068605423,
|
35475 |
+
"learning_rate": 0.0006199362344132243,
|
35476 |
+
"loss": 1.5075,
|
35477 |
+
"step": 10088
|
35478 |
+
},
|
35479 |
+
{
|
35480 |
+
"epoch": 0.4486238940020453,
|
35481 |
+
"grad_norm": 0.06461120396852493,
|
35482 |
+
"learning_rate": 0.0006197943087679013,
|
35483 |
+
"loss": 1.5123,
|
35484 |
+
"step": 10090
|
35485 |
+
},
|
35486 |
+
{
|
35487 |
+
"epoch": 0.44871281846071764,
|
35488 |
+
"grad_norm": 0.06290825456380844,
|
35489 |
+
"learning_rate": 0.0006196523728820329,
|
35490 |
+
"loss": 1.5114,
|
35491 |
+
"step": 10092
|
35492 |
+
},
|
35493 |
+
{
|
35494 |
+
"epoch": 0.44880174291938996,
|
35495 |
+
"grad_norm": 0.06232891604304314,
|
35496 |
+
"learning_rate": 0.0006195104267677525,
|
35497 |
+
"loss": 1.5073,
|
35498 |
+
"step": 10094
|
35499 |
+
},
|
35500 |
+
{
|
35501 |
+
"epoch": 0.4488906673780623,
|
35502 |
+
"grad_norm": 0.06516814976930618,
|
35503 |
+
"learning_rate": 0.0006193684704371941,
|
35504 |
+
"loss": 1.519,
|
35505 |
+
"step": 10096
|
35506 |
+
},
|
35507 |
+
{
|
35508 |
+
"epoch": 0.4489795918367347,
|
35509 |
+
"grad_norm": 0.06370670348405838,
|
35510 |
+
"learning_rate": 0.0006192265039024928,
|
35511 |
+
"loss": 1.5129,
|
35512 |
+
"step": 10098
|
35513 |
+
},
|
35514 |
+
{
|
35515 |
+
"epoch": 0.44906851629540706,
|
35516 |
+
"grad_norm": 0.0628281906247139,
|
35517 |
+
"learning_rate": 0.0006190845271757846,
|
35518 |
+
"loss": 1.5132,
|
35519 |
+
"step": 10100
|
35520 |
+
},
|
35521 |
+
{
|
35522 |
+
"epoch": 0.44915744075407943,
|
35523 |
+
"grad_norm": 0.06411248445510864,
|
35524 |
+
"learning_rate": 0.0006189425402692061,
|
35525 |
+
"loss": 1.5139,
|
35526 |
+
"step": 10102
|
35527 |
+
},
|
35528 |
+
{
|
35529 |
+
"epoch": 0.44924636521275174,
|
35530 |
+
"grad_norm": 0.06423705816268921,
|
35531 |
+
"learning_rate": 0.0006188005431948953,
|
35532 |
+
"loss": 1.5087,
|
35533 |
+
"step": 10104
|
35534 |
+
},
|
35535 |
+
{
|
35536 |
+
"epoch": 0.4493352896714241,
|
35537 |
+
"grad_norm": 0.06506875157356262,
|
35538 |
+
"learning_rate": 0.0006186585359649903,
|
35539 |
+
"loss": 1.5149,
|
35540 |
+
"step": 10106
|
35541 |
+
},
|
35542 |
+
{
|
35543 |
+
"epoch": 0.4494242141300965,
|
35544 |
+
"grad_norm": 0.06523150950670242,
|
35545 |
+
"learning_rate": 0.0006185165185916308,
|
35546 |
+
"loss": 1.5154,
|
35547 |
+
"step": 10108
|
35548 |
+
},
|
35549 |
+
{
|
35550 |
+
"epoch": 0.44951313858876885,
|
35551 |
+
"grad_norm": 0.06264711171388626,
|
35552 |
+
"learning_rate": 0.0006183744910869571,
|
35553 |
+
"loss": 1.511,
|
35554 |
+
"step": 10110
|
35555 |
+
},
|
35556 |
+
{
|
35557 |
+
"epoch": 0.4496020630474412,
|
35558 |
+
"grad_norm": 0.06609227508306503,
|
35559 |
+
"learning_rate": 0.0006182324534631102,
|
35560 |
+
"loss": 1.5134,
|
35561 |
+
"step": 10112
|
35562 |
+
},
|
35563 |
+
{
|
35564 |
+
"epoch": 0.4496909875061136,
|
35565 |
+
"grad_norm": 0.06463871151208878,
|
35566 |
+
"learning_rate": 0.0006180904057322321,
|
35567 |
+
"loss": 1.5089,
|
35568 |
+
"step": 10114
|
35569 |
+
},
|
35570 |
+
{
|
35571 |
+
"epoch": 0.4497799119647859,
|
35572 |
+
"grad_norm": 0.06578138470649719,
|
35573 |
+
"learning_rate": 0.0006179483479064657,
|
35574 |
+
"loss": 1.5116,
|
35575 |
+
"step": 10116
|
35576 |
+
},
|
35577 |
+
{
|
35578 |
+
"epoch": 0.44986883642345826,
|
35579 |
+
"grad_norm": 0.0638619214296341,
|
35580 |
+
"learning_rate": 0.0006178062799979548,
|
35581 |
+
"loss": 1.5139,
|
35582 |
+
"step": 10118
|
35583 |
+
},
|
35584 |
+
{
|
35585 |
+
"epoch": 0.44995776088213063,
|
35586 |
+
"grad_norm": 0.06302259117364883,
|
35587 |
+
"learning_rate": 0.0006176642020188439,
|
35588 |
+
"loss": 1.5093,
|
35589 |
+
"step": 10120
|
35590 |
+
},
|
35591 |
+
{
|
35592 |
+
"epoch": 0.450046685340803,
|
35593 |
+
"grad_norm": 0.06259556859731674,
|
35594 |
+
"learning_rate": 0.0006175221139812784,
|
35595 |
+
"loss": 1.51,
|
35596 |
+
"step": 10122
|
35597 |
+
},
|
35598 |
+
{
|
35599 |
+
"epoch": 0.45013560979947537,
|
35600 |
+
"grad_norm": 0.06390849500894547,
|
35601 |
+
"learning_rate": 0.0006173800158974048,
|
35602 |
+
"loss": 1.5142,
|
35603 |
+
"step": 10124
|
35604 |
+
},
|
35605 |
+
{
|
35606 |
+
"epoch": 0.4502245342581477,
|
35607 |
+
"grad_norm": 0.06265709549188614,
|
35608 |
+
"learning_rate": 0.0006172379077793702,
|
35609 |
+
"loss": 1.5049,
|
35610 |
+
"step": 10126
|
35611 |
+
},
|
35612 |
+
{
|
35613 |
+
"epoch": 0.45031345871682005,
|
35614 |
+
"grad_norm": 0.061674658209085464,
|
35615 |
+
"learning_rate": 0.0006170957896393225,
|
35616 |
+
"loss": 1.5133,
|
35617 |
+
"step": 10128
|
35618 |
+
},
|
35619 |
+
{
|
35620 |
+
"epoch": 0.4504023831754924,
|
35621 |
+
"grad_norm": 0.06264129281044006,
|
35622 |
+
"learning_rate": 0.0006169536614894107,
|
35623 |
+
"loss": 1.5137,
|
35624 |
+
"step": 10130
|
35625 |
+
},
|
35626 |
+
{
|
35627 |
+
"epoch": 0.4504913076341648,
|
35628 |
+
"grad_norm": 0.06164534017443657,
|
35629 |
+
"learning_rate": 0.0006168115233417846,
|
35630 |
+
"loss": 1.5115,
|
35631 |
+
"step": 10132
|
35632 |
+
},
|
35633 |
+
{
|
35634 |
+
"epoch": 0.45058023209283715,
|
35635 |
+
"grad_norm": 0.06206073611974716,
|
35636 |
+
"learning_rate": 0.0006166693752085946,
|
35637 |
+
"loss": 1.5174,
|
35638 |
+
"step": 10134
|
35639 |
+
},
|
35640 |
+
{
|
35641 |
+
"epoch": 0.4506691565515095,
|
35642 |
+
"grad_norm": 0.06333418935537338,
|
35643 |
+
"learning_rate": 0.0006165272171019923,
|
35644 |
+
"loss": 1.5157,
|
35645 |
+
"step": 10136
|
35646 |
+
},
|
35647 |
+
{
|
35648 |
+
"epoch": 0.45075808101018183,
|
35649 |
+
"grad_norm": 0.06554005295038223,
|
35650 |
+
"learning_rate": 0.0006163850490341298,
|
35651 |
+
"loss": 1.5043,
|
35652 |
+
"step": 10138
|
35653 |
+
},
|
35654 |
+
{
|
35655 |
+
"epoch": 0.4508470054688542,
|
35656 |
+
"grad_norm": 0.06292907893657684,
|
35657 |
+
"learning_rate": 0.0006162428710171608,
|
35658 |
+
"loss": 1.5117,
|
35659 |
+
"step": 10140
|
35660 |
+
},
|
35661 |
+
{
|
35662 |
+
"epoch": 0.45093592992752657,
|
35663 |
+
"grad_norm": 0.06288234144449234,
|
35664 |
+
"learning_rate": 0.0006161006830632386,
|
35665 |
+
"loss": 1.5138,
|
35666 |
+
"step": 10142
|
35667 |
+
},
|
35668 |
+
{
|
35669 |
+
"epoch": 0.45102485438619894,
|
35670 |
+
"grad_norm": 0.06246441975235939,
|
35671 |
+
"learning_rate": 0.0006159584851845184,
|
35672 |
+
"loss": 1.5104,
|
35673 |
+
"step": 10144
|
35674 |
+
},
|
35675 |
+
{
|
35676 |
+
"epoch": 0.4511137788448713,
|
35677 |
+
"grad_norm": 0.062472037971019745,
|
35678 |
+
"learning_rate": 0.0006158162773931559,
|
35679 |
+
"loss": 1.5149,
|
35680 |
+
"step": 10146
|
35681 |
+
},
|
35682 |
+
{
|
35683 |
+
"epoch": 0.4512027033035436,
|
35684 |
+
"grad_norm": 0.06365121155977249,
|
35685 |
+
"learning_rate": 0.0006156740597013079,
|
35686 |
+
"loss": 1.5114,
|
35687 |
+
"step": 10148
|
35688 |
+
},
|
35689 |
+
{
|
35690 |
+
"epoch": 0.451291627762216,
|
35691 |
+
"grad_norm": 0.06463265419006348,
|
35692 |
+
"learning_rate": 0.0006155318321211312,
|
35693 |
+
"loss": 1.5189,
|
35694 |
+
"step": 10150
|
35695 |
+
},
|
35696 |
+
{
|
35697 |
+
"epoch": 0.45138055222088835,
|
35698 |
+
"grad_norm": 0.06455686688423157,
|
35699 |
+
"learning_rate": 0.0006153895946647845,
|
35700 |
+
"loss": 1.5085,
|
35701 |
+
"step": 10152
|
35702 |
+
},
|
35703 |
+
{
|
35704 |
+
"epoch": 0.4514694766795607,
|
35705 |
+
"grad_norm": 0.06434636563062668,
|
35706 |
+
"learning_rate": 0.0006152473473444265,
|
35707 |
+
"loss": 1.5093,
|
35708 |
+
"step": 10154
|
35709 |
+
},
|
35710 |
+
{
|
35711 |
+
"epoch": 0.4515584011382331,
|
35712 |
+
"grad_norm": 0.06507983803749084,
|
35713 |
+
"learning_rate": 0.0006151050901722177,
|
35714 |
+
"loss": 1.5084,
|
35715 |
+
"step": 10156
|
35716 |
+
},
|
35717 |
+
{
|
35718 |
+
"epoch": 0.4516473255969054,
|
35719 |
+
"grad_norm": 0.0638333186507225,
|
35720 |
+
"learning_rate": 0.0006149628231603184,
|
35721 |
+
"loss": 1.5092,
|
35722 |
+
"step": 10158
|
35723 |
+
},
|
35724 |
+
{
|
35725 |
+
"epoch": 0.45173625005557777,
|
35726 |
+
"grad_norm": 0.0641988068819046,
|
35727 |
+
"learning_rate": 0.0006148205463208902,
|
35728 |
+
"loss": 1.5138,
|
35729 |
+
"step": 10160
|
35730 |
+
},
|
35731 |
+
{
|
35732 |
+
"epoch": 0.45182517451425014,
|
35733 |
+
"grad_norm": 0.06309515982866287,
|
35734 |
+
"learning_rate": 0.000614678259666096,
|
35735 |
+
"loss": 1.5057,
|
35736 |
+
"step": 10162
|
35737 |
+
},
|
35738 |
+
{
|
35739 |
+
"epoch": 0.4519140989729225,
|
35740 |
+
"grad_norm": 0.06427089869976044,
|
35741 |
+
"learning_rate": 0.0006145359632080987,
|
35742 |
+
"loss": 1.5137,
|
35743 |
+
"step": 10164
|
35744 |
+
},
|
35745 |
+
{
|
35746 |
+
"epoch": 0.4520030234315949,
|
35747 |
+
"grad_norm": 0.06765829026699066,
|
35748 |
+
"learning_rate": 0.0006143936569590624,
|
35749 |
+
"loss": 1.5093,
|
35750 |
+
"step": 10166
|
35751 |
+
},
|
35752 |
+
{
|
35753 |
+
"epoch": 0.45209194789026724,
|
35754 |
+
"grad_norm": 0.0644116923213005,
|
35755 |
+
"learning_rate": 0.0006142513409311522,
|
35756 |
+
"loss": 1.5123,
|
35757 |
+
"step": 10168
|
35758 |
+
},
|
35759 |
+
{
|
35760 |
+
"epoch": 0.45218087234893956,
|
35761 |
+
"grad_norm": 0.06532014906406403,
|
35762 |
+
"learning_rate": 0.000614109015136534,
|
35763 |
+
"loss": 1.5055,
|
35764 |
+
"step": 10170
|
35765 |
+
},
|
35766 |
+
{
|
35767 |
+
"epoch": 0.4522697968076119,
|
35768 |
+
"grad_norm": 0.06273654848337173,
|
35769 |
+
"learning_rate": 0.0006139666795873743,
|
35770 |
+
"loss": 1.5117,
|
35771 |
+
"step": 10172
|
35772 |
+
},
|
35773 |
+
{
|
35774 |
+
"epoch": 0.4523587212662843,
|
35775 |
+
"grad_norm": 0.0660717710852623,
|
35776 |
+
"learning_rate": 0.0006138243342958405,
|
35777 |
+
"loss": 1.5065,
|
35778 |
+
"step": 10174
|
35779 |
+
},
|
35780 |
+
{
|
35781 |
+
"epoch": 0.45244764572495666,
|
35782 |
+
"grad_norm": 0.06457041203975677,
|
35783 |
+
"learning_rate": 0.0006136819792741011,
|
35784 |
+
"loss": 1.5117,
|
35785 |
+
"step": 10176
|
35786 |
+
},
|
35787 |
+
{
|
35788 |
+
"epoch": 0.452536570183629,
|
35789 |
+
"grad_norm": 0.06340939551591873,
|
35790 |
+
"learning_rate": 0.000613539614534325,
|
35791 |
+
"loss": 1.507,
|
35792 |
+
"step": 10178
|
35793 |
+
},
|
35794 |
+
{
|
35795 |
+
"epoch": 0.45262549464230134,
|
35796 |
+
"grad_norm": 0.06222638487815857,
|
35797 |
+
"learning_rate": 0.0006133972400886825,
|
35798 |
+
"loss": 1.5065,
|
35799 |
+
"step": 10180
|
35800 |
+
},
|
35801 |
+
{
|
35802 |
+
"epoch": 0.4527144191009737,
|
35803 |
+
"grad_norm": 0.06319104135036469,
|
35804 |
+
"learning_rate": 0.0006132548559493441,
|
35805 |
+
"loss": 1.5134,
|
35806 |
+
"step": 10182
|
35807 |
+
},
|
35808 |
+
{
|
35809 |
+
"epoch": 0.4528033435596461,
|
35810 |
+
"grad_norm": 0.06425220519304276,
|
35811 |
+
"learning_rate": 0.0006131124621284815,
|
35812 |
+
"loss": 1.509,
|
35813 |
+
"step": 10184
|
35814 |
+
},
|
35815 |
+
{
|
35816 |
+
"epoch": 0.45289226801831844,
|
35817 |
+
"grad_norm": 0.06511306017637253,
|
35818 |
+
"learning_rate": 0.0006129700586382671,
|
35819 |
+
"loss": 1.5106,
|
35820 |
+
"step": 10186
|
35821 |
+
},
|
35822 |
+
{
|
35823 |
+
"epoch": 0.4529811924769908,
|
35824 |
+
"grad_norm": 0.06421872228384018,
|
35825 |
+
"learning_rate": 0.0006128276454908742,
|
35826 |
+
"loss": 1.5101,
|
35827 |
+
"step": 10188
|
35828 |
+
},
|
35829 |
+
{
|
35830 |
+
"epoch": 0.4530701169356632,
|
35831 |
+
"grad_norm": 0.06392832100391388,
|
35832 |
+
"learning_rate": 0.0006126852226984771,
|
35833 |
+
"loss": 1.5032,
|
35834 |
+
"step": 10190
|
35835 |
+
},
|
35836 |
+
{
|
35837 |
+
"epoch": 0.4531590413943355,
|
35838 |
+
"grad_norm": 0.06504455208778381,
|
35839 |
+
"learning_rate": 0.0006125427902732506,
|
35840 |
+
"loss": 1.5128,
|
35841 |
+
"step": 10192
|
35842 |
+
},
|
35843 |
+
{
|
35844 |
+
"epoch": 0.45324796585300786,
|
35845 |
+
"grad_norm": 0.06360786408185959,
|
35846 |
+
"learning_rate": 0.0006124003482273704,
|
35847 |
+
"loss": 1.5038,
|
35848 |
+
"step": 10194
|
35849 |
+
},
|
35850 |
+
{
|
35851 |
+
"epoch": 0.45333689031168023,
|
35852 |
+
"grad_norm": 0.06323164701461792,
|
35853 |
+
"learning_rate": 0.000612257896573013,
|
35854 |
+
"loss": 1.5068,
|
35855 |
+
"step": 10196
|
35856 |
+
},
|
35857 |
+
{
|
35858 |
+
"epoch": 0.4534258147703526,
|
35859 |
+
"grad_norm": 0.06563688069581985,
|
35860 |
+
"learning_rate": 0.000612115435322356,
|
35861 |
+
"loss": 1.5096,
|
35862 |
+
"step": 10198
|
35863 |
+
},
|
35864 |
+
{
|
35865 |
+
"epoch": 0.45351473922902497,
|
35866 |
+
"grad_norm": 0.06295774132013321,
|
35867 |
+
"learning_rate": 0.0006119729644875774,
|
35868 |
+
"loss": 1.5103,
|
35869 |
+
"step": 10200
|
35870 |
+
},
|
35871 |
+
{
|
35872 |
+
"epoch": 0.4536036636876973,
|
35873 |
+
"grad_norm": 0.06223352998495102,
|
35874 |
+
"learning_rate": 0.0006118304840808565,
|
35875 |
+
"loss": 1.5064,
|
35876 |
+
"step": 10202
|
35877 |
+
},
|
35878 |
+
{
|
35879 |
+
"epoch": 0.45369258814636965,
|
35880 |
+
"grad_norm": 0.06273999065160751,
|
35881 |
+
"learning_rate": 0.0006116879941143728,
|
35882 |
+
"loss": 1.5096,
|
35883 |
+
"step": 10204
|
35884 |
+
},
|
35885 |
+
{
|
35886 |
+
"epoch": 0.453781512605042,
|
35887 |
+
"grad_norm": 0.06447035819292068,
|
35888 |
+
"learning_rate": 0.0006115454946003074,
|
35889 |
+
"loss": 1.5094,
|
35890 |
+
"step": 10206
|
35891 |
+
},
|
35892 |
+
{
|
35893 |
+
"epoch": 0.4538704370637144,
|
35894 |
+
"grad_norm": 0.06401224434375763,
|
35895 |
+
"learning_rate": 0.0006114029855508413,
|
35896 |
+
"loss": 1.5158,
|
35897 |
+
"step": 10208
|
35898 |
+
},
|
35899 |
+
{
|
35900 |
+
"epoch": 0.45395936152238675,
|
35901 |
+
"grad_norm": 0.06190183758735657,
|
35902 |
+
"learning_rate": 0.0006112604669781572,
|
35903 |
+
"loss": 1.5101,
|
35904 |
+
"step": 10210
|
35905 |
+
},
|
35906 |
+
{
|
35907 |
+
"epoch": 0.45404828598105906,
|
35908 |
+
"grad_norm": 0.06359678506851196,
|
35909 |
+
"learning_rate": 0.0006111179388944381,
|
35910 |
+
"loss": 1.5114,
|
35911 |
+
"step": 10212
|
35912 |
+
},
|
35913 |
+
{
|
35914 |
+
"epoch": 0.45413721043973143,
|
35915 |
+
"grad_norm": 0.06362069398164749,
|
35916 |
+
"learning_rate": 0.0006109754013118678,
|
35917 |
+
"loss": 1.5103,
|
35918 |
+
"step": 10214
|
35919 |
+
},
|
35920 |
+
{
|
35921 |
+
"epoch": 0.4542261348984038,
|
35922 |
+
"grad_norm": 0.06391779333353043,
|
35923 |
+
"learning_rate": 0.0006108328542426312,
|
35924 |
+
"loss": 1.5183,
|
35925 |
+
"step": 10216
|
35926 |
+
},
|
35927 |
+
{
|
35928 |
+
"epoch": 0.45431505935707617,
|
35929 |
+
"grad_norm": 0.06379327178001404,
|
35930 |
+
"learning_rate": 0.0006106902976989139,
|
35931 |
+
"loss": 1.5107,
|
35932 |
+
"step": 10218
|
35933 |
+
},
|
35934 |
+
{
|
35935 |
+
"epoch": 0.45440398381574854,
|
35936 |
+
"grad_norm": 0.06601396203041077,
|
35937 |
+
"learning_rate": 0.0006105477316929021,
|
35938 |
+
"loss": 1.5121,
|
35939 |
+
"step": 10220
|
35940 |
+
},
|
35941 |
+
{
|
35942 |
+
"epoch": 0.4544929082744209,
|
35943 |
+
"grad_norm": 0.0628276839852333,
|
35944 |
+
"learning_rate": 0.0006104051562367829,
|
35945 |
+
"loss": 1.5103,
|
35946 |
+
"step": 10222
|
35947 |
+
},
|
35948 |
+
{
|
35949 |
+
"epoch": 0.4545818327330932,
|
35950 |
+
"grad_norm": 0.06758300215005875,
|
35951 |
+
"learning_rate": 0.0006102625713427446,
|
35952 |
+
"loss": 1.5088,
|
35953 |
+
"step": 10224
|
35954 |
+
},
|
35955 |
+
{
|
35956 |
+
"epoch": 0.4546707571917656,
|
35957 |
+
"grad_norm": 0.06313972175121307,
|
35958 |
+
"learning_rate": 0.0006101199770229758,
|
35959 |
+
"loss": 1.5101,
|
35960 |
+
"step": 10226
|
35961 |
+
},
|
35962 |
+
{
|
35963 |
+
"epoch": 0.45475968165043795,
|
35964 |
+
"grad_norm": 0.06574665755033493,
|
35965 |
+
"learning_rate": 0.0006099773732896658,
|
35966 |
+
"loss": 1.5103,
|
35967 |
+
"step": 10228
|
35968 |
+
},
|
35969 |
+
{
|
35970 |
+
"epoch": 0.4548486061091103,
|
35971 |
+
"grad_norm": 0.06595081835985184,
|
35972 |
+
"learning_rate": 0.0006098347601550055,
|
35973 |
+
"loss": 1.5099,
|
35974 |
+
"step": 10230
|
35975 |
+
},
|
35976 |
+
{
|
35977 |
+
"epoch": 0.4549375305677827,
|
35978 |
+
"grad_norm": 0.06619875133037567,
|
35979 |
+
"learning_rate": 0.0006096921376311857,
|
35980 |
+
"loss": 1.5086,
|
35981 |
+
"step": 10232
|
35982 |
+
},
|
35983 |
+
{
|
35984 |
+
"epoch": 0.455026455026455,
|
35985 |
+
"grad_norm": 0.06375854462385178,
|
35986 |
+
"learning_rate": 0.0006095495057303988,
|
35987 |
+
"loss": 1.5088,
|
35988 |
+
"step": 10234
|
35989 |
+
},
|
35990 |
+
{
|
35991 |
+
"epoch": 0.45511537948512737,
|
35992 |
+
"grad_norm": 0.06172388046979904,
|
35993 |
+
"learning_rate": 0.0006094068644648373,
|
35994 |
+
"loss": 1.5109,
|
35995 |
+
"step": 10236
|
35996 |
+
},
|
35997 |
+
{
|
35998 |
+
"epoch": 0.45520430394379974,
|
35999 |
+
"grad_norm": 0.06474345177412033,
|
36000 |
+
"learning_rate": 0.0006092642138466948,
|
36001 |
+
"loss": 1.5136,
|
36002 |
+
"step": 10238
|
36003 |
+
},
|
36004 |
+
{
|
36005 |
+
"epoch": 0.4552932284024721,
|
36006 |
+
"grad_norm": 0.06749559938907623,
|
36007 |
+
"learning_rate": 0.0006091215538881658,
|
36008 |
+
"loss": 1.5045,
|
36009 |
+
"step": 10240
|
36010 |
+
},
|
36011 |
+
{
|
36012 |
+
"epoch": 0.4553821528611445,
|
36013 |
+
"grad_norm": 0.062397945672273636,
|
36014 |
+
"learning_rate": 0.0006089788846014457,
|
36015 |
+
"loss": 1.5162,
|
36016 |
+
"step": 10242
|
36017 |
+
},
|
36018 |
+
{
|
36019 |
+
"epoch": 0.45547107731981684,
|
36020 |
+
"grad_norm": 0.06550050526857376,
|
36021 |
+
"learning_rate": 0.0006088362059987301,
|
36022 |
+
"loss": 1.5102,
|
36023 |
+
"step": 10244
|
36024 |
+
},
|
36025 |
+
{
|
36026 |
+
"epoch": 0.45556000177848915,
|
36027 |
+
"grad_norm": 0.0633438304066658,
|
36028 |
+
"learning_rate": 0.0006086935180922159,
|
36029 |
+
"loss": 1.506,
|
36030 |
+
"step": 10246
|
36031 |
+
},
|
36032 |
+
{
|
36033 |
+
"epoch": 0.4556489262371615,
|
36034 |
+
"grad_norm": 0.06535664945840836,
|
36035 |
+
"learning_rate": 0.000608550820894101,
|
36036 |
+
"loss": 1.5092,
|
36037 |
+
"step": 10248
|
36038 |
+
},
|
36039 |
+
{
|
36040 |
+
"epoch": 0.4557378506958339,
|
36041 |
+
"grad_norm": 0.0659709945321083,
|
36042 |
+
"learning_rate": 0.0006084081144165835,
|
36043 |
+
"loss": 1.518,
|
36044 |
+
"step": 10250
|
36045 |
+
},
|
36046 |
+
{
|
36047 |
+
"epoch": 0.45582677515450626,
|
36048 |
+
"grad_norm": 0.06421976536512375,
|
36049 |
+
"learning_rate": 0.0006082653986718626,
|
36050 |
+
"loss": 1.5138,
|
36051 |
+
"step": 10252
|
36052 |
+
},
|
36053 |
+
{
|
36054 |
+
"epoch": 0.4559156996131786,
|
36055 |
+
"grad_norm": 0.06513391435146332,
|
36056 |
+
"learning_rate": 0.0006081226736721383,
|
36057 |
+
"loss": 1.5079,
|
36058 |
+
"step": 10254
|
36059 |
+
},
|
36060 |
+
{
|
36061 |
+
"epoch": 0.45600462407185094,
|
36062 |
+
"grad_norm": 0.06458871066570282,
|
36063 |
+
"learning_rate": 0.0006079799394296115,
|
36064 |
+
"loss": 1.5101,
|
36065 |
+
"step": 10256
|
36066 |
+
},
|
36067 |
+
{
|
36068 |
+
"epoch": 0.4560935485305233,
|
36069 |
+
"grad_norm": 0.06421530246734619,
|
36070 |
+
"learning_rate": 0.0006078371959564833,
|
36071 |
+
"loss": 1.5049,
|
36072 |
+
"step": 10258
|
36073 |
+
},
|
36074 |
+
{
|
36075 |
+
"epoch": 0.4561824729891957,
|
36076 |
+
"grad_norm": 0.0646352618932724,
|
36077 |
+
"learning_rate": 0.0006076944432649567,
|
36078 |
+
"loss": 1.504,
|
36079 |
+
"step": 10260
|
36080 |
+
},
|
36081 |
+
{
|
36082 |
+
"epoch": 0.45627139744786804,
|
36083 |
+
"grad_norm": 0.06522826105356216,
|
36084 |
+
"learning_rate": 0.0006075516813672342,
|
36085 |
+
"loss": 1.5081,
|
36086 |
+
"step": 10262
|
36087 |
+
},
|
36088 |
+
{
|
36089 |
+
"epoch": 0.4563603219065404,
|
36090 |
+
"grad_norm": 0.06355036050081253,
|
36091 |
+
"learning_rate": 0.0006074089102755204,
|
36092 |
+
"loss": 1.5131,
|
36093 |
+
"step": 10264
|
36094 |
+
},
|
36095 |
+
{
|
36096 |
+
"epoch": 0.4564492463652127,
|
36097 |
+
"grad_norm": 0.06306944042444229,
|
36098 |
+
"learning_rate": 0.0006072661300020193,
|
36099 |
+
"loss": 1.5033,
|
36100 |
+
"step": 10266
|
36101 |
+
},
|
36102 |
+
{
|
36103 |
+
"epoch": 0.4565381708238851,
|
36104 |
+
"grad_norm": 0.0642002746462822,
|
36105 |
+
"learning_rate": 0.0006071233405589368,
|
36106 |
+
"loss": 1.5074,
|
36107 |
+
"step": 10268
|
36108 |
+
},
|
36109 |
+
{
|
36110 |
+
"epoch": 0.45662709528255746,
|
36111 |
+
"grad_norm": 0.06410963833332062,
|
36112 |
+
"learning_rate": 0.0006069805419584791,
|
36113 |
+
"loss": 1.5062,
|
36114 |
+
"step": 10270
|
36115 |
+
},
|
36116 |
+
{
|
36117 |
+
"epoch": 0.45671601974122983,
|
36118 |
+
"grad_norm": 0.06610678881406784,
|
36119 |
+
"learning_rate": 0.0006068377342128532,
|
36120 |
+
"loss": 1.5125,
|
36121 |
+
"step": 10272
|
36122 |
+
},
|
36123 |
+
{
|
36124 |
+
"epoch": 0.4568049441999022,
|
36125 |
+
"grad_norm": 0.06421998143196106,
|
36126 |
+
"learning_rate": 0.000606694917334267,
|
36127 |
+
"loss": 1.5103,
|
36128 |
+
"step": 10274
|
36129 |
+
},
|
36130 |
+
{
|
36131 |
+
"epoch": 0.45689386865857456,
|
36132 |
+
"grad_norm": 0.0632401779294014,
|
36133 |
+
"learning_rate": 0.000606552091334929,
|
36134 |
+
"loss": 1.5144,
|
36135 |
+
"step": 10276
|
36136 |
+
},
|
36137 |
+
{
|
36138 |
+
"epoch": 0.4569827931172469,
|
36139 |
+
"grad_norm": 0.06443547457456589,
|
36140 |
+
"learning_rate": 0.0006064092562270487,
|
36141 |
+
"loss": 1.5061,
|
36142 |
+
"step": 10278
|
36143 |
+
},
|
36144 |
+
{
|
36145 |
+
"epoch": 0.45707171757591925,
|
36146 |
+
"grad_norm": 0.06429964303970337,
|
36147 |
+
"learning_rate": 0.0006062664120228363,
|
36148 |
+
"loss": 1.5122,
|
36149 |
+
"step": 10280
|
36150 |
+
},
|
36151 |
+
{
|
36152 |
+
"epoch": 0.4571606420345916,
|
36153 |
+
"grad_norm": 0.06369158625602722,
|
36154 |
+
"learning_rate": 0.0006061235587345025,
|
36155 |
+
"loss": 1.5081,
|
36156 |
+
"step": 10282
|
36157 |
+
},
|
36158 |
+
{
|
36159 |
+
"epoch": 0.457249566493264,
|
36160 |
+
"grad_norm": 0.062136199325323105,
|
36161 |
+
"learning_rate": 0.0006059806963742595,
|
36162 |
+
"loss": 1.5106,
|
36163 |
+
"step": 10284
|
36164 |
+
},
|
36165 |
+
{
|
36166 |
+
"epoch": 0.45733849095193635,
|
36167 |
+
"grad_norm": 0.06280120462179184,
|
36168 |
+
"learning_rate": 0.0006058378249543193,
|
36169 |
+
"loss": 1.5096,
|
36170 |
+
"step": 10286
|
36171 |
+
},
|
36172 |
+
{
|
36173 |
+
"epoch": 0.45742741541060866,
|
36174 |
+
"grad_norm": 0.06295254826545715,
|
36175 |
+
"learning_rate": 0.0006056949444868956,
|
36176 |
+
"loss": 1.5038,
|
36177 |
+
"step": 10288
|
36178 |
+
},
|
36179 |
+
{
|
36180 |
+
"epoch": 0.45751633986928103,
|
36181 |
+
"grad_norm": 0.0640212669968605,
|
36182 |
+
"learning_rate": 0.0006055520549842022,
|
36183 |
+
"loss": 1.5059,
|
36184 |
+
"step": 10290
|
36185 |
+
},
|
36186 |
+
{
|
36187 |
+
"epoch": 0.4576052643279534,
|
36188 |
+
"grad_norm": 0.06613568961620331,
|
36189 |
+
"learning_rate": 0.000605409156458454,
|
36190 |
+
"loss": 1.5122,
|
36191 |
+
"step": 10292
|
36192 |
+
},
|
36193 |
+
{
|
36194 |
+
"epoch": 0.45769418878662577,
|
36195 |
+
"grad_norm": 0.06338582932949066,
|
36196 |
+
"learning_rate": 0.0006052662489218665,
|
36197 |
+
"loss": 1.5069,
|
36198 |
+
"step": 10294
|
36199 |
+
},
|
36200 |
+
{
|
36201 |
+
"epoch": 0.45778311324529813,
|
36202 |
+
"grad_norm": 0.06454145908355713,
|
36203 |
+
"learning_rate": 0.0006051233323866563,
|
36204 |
+
"loss": 1.506,
|
36205 |
+
"step": 10296
|
36206 |
+
},
|
36207 |
+
{
|
36208 |
+
"epoch": 0.4578720377039705,
|
36209 |
+
"grad_norm": 0.06602118164300919,
|
36210 |
+
"learning_rate": 0.0006049804068650403,
|
36211 |
+
"loss": 1.5075,
|
36212 |
+
"step": 10298
|
36213 |
+
},
|
36214 |
+
{
|
36215 |
+
"epoch": 0.4579609621626428,
|
36216 |
+
"grad_norm": 0.06418369710445404,
|
36217 |
+
"learning_rate": 0.0006048374723692365,
|
36218 |
+
"loss": 1.508,
|
36219 |
+
"step": 10300
|
36220 |
+
},
|
36221 |
+
{
|
36222 |
+
"epoch": 0.4580498866213152,
|
36223 |
+
"grad_norm": 0.063139408826828,
|
36224 |
+
"learning_rate": 0.0006046945289114634,
|
36225 |
+
"loss": 1.5068,
|
36226 |
+
"step": 10302
|
36227 |
+
},
|
36228 |
+
{
|
36229 |
+
"epoch": 0.45813881107998755,
|
36230 |
+
"grad_norm": 0.06343439221382141,
|
36231 |
+
"learning_rate": 0.0006045515765039408,
|
36232 |
+
"loss": 1.5078,
|
36233 |
+
"step": 10304
|
36234 |
+
},
|
36235 |
+
{
|
36236 |
+
"epoch": 0.4582277355386599,
|
36237 |
+
"grad_norm": 0.06396979093551636,
|
36238 |
+
"learning_rate": 0.0006044086151588886,
|
36239 |
+
"loss": 1.5098,
|
36240 |
+
"step": 10306
|
36241 |
+
},
|
36242 |
+
{
|
36243 |
+
"epoch": 0.4583166599973323,
|
36244 |
+
"grad_norm": 0.06393872201442719,
|
36245 |
+
"learning_rate": 0.0006042656448885279,
|
36246 |
+
"loss": 1.507,
|
36247 |
+
"step": 10308
|
36248 |
+
},
|
36249 |
+
{
|
36250 |
+
"epoch": 0.4584055844560046,
|
36251 |
+
"grad_norm": 0.06459838151931763,
|
36252 |
+
"learning_rate": 0.0006041226657050804,
|
36253 |
+
"loss": 1.5107,
|
36254 |
+
"step": 10310
|
36255 |
+
},
|
36256 |
+
{
|
36257 |
+
"epoch": 0.45849450891467697,
|
36258 |
+
"grad_norm": 0.06489048898220062,
|
36259 |
+
"learning_rate": 0.0006039796776207686,
|
36260 |
+
"loss": 1.5094,
|
36261 |
+
"step": 10312
|
36262 |
+
},
|
36263 |
+
{
|
36264 |
+
"epoch": 0.45858343337334934,
|
36265 |
+
"grad_norm": 0.06409396976232529,
|
36266 |
+
"learning_rate": 0.0006038366806478157,
|
36267 |
+
"loss": 1.5047,
|
36268 |
+
"step": 10314
|
36269 |
+
},
|
36270 |
+
{
|
36271 |
+
"epoch": 0.4586723578320217,
|
36272 |
+
"grad_norm": 0.06495364010334015,
|
36273 |
+
"learning_rate": 0.0006036936747984456,
|
36274 |
+
"loss": 1.4996,
|
36275 |
+
"step": 10316
|
36276 |
+
},
|
36277 |
+
{
|
36278 |
+
"epoch": 0.4587612822906941,
|
36279 |
+
"grad_norm": 0.06571628898382187,
|
36280 |
+
"learning_rate": 0.0006035506600848835,
|
36281 |
+
"loss": 1.5113,
|
36282 |
+
"step": 10318
|
36283 |
+
},
|
36284 |
+
{
|
36285 |
+
"epoch": 0.45885020674936644,
|
36286 |
+
"grad_norm": 0.06504350155591965,
|
36287 |
+
"learning_rate": 0.0006034076365193545,
|
36288 |
+
"loss": 1.4964,
|
36289 |
+
"step": 10320
|
36290 |
+
},
|
36291 |
+
{
|
36292 |
+
"epoch": 0.45893913120803875,
|
36293 |
+
"grad_norm": 0.06698577105998993,
|
36294 |
+
"learning_rate": 0.0006032646041140849,
|
36295 |
+
"loss": 1.5051,
|
36296 |
+
"step": 10322
|
36297 |
+
},
|
36298 |
+
{
|
36299 |
+
"epoch": 0.4590280556667111,
|
36300 |
+
"grad_norm": 0.06189613789319992,
|
36301 |
+
"learning_rate": 0.0006031215628813021,
|
36302 |
+
"loss": 1.5097,
|
36303 |
+
"step": 10324
|
36304 |
+
},
|
36305 |
+
{
|
36306 |
+
"epoch": 0.4591169801253835,
|
36307 |
+
"grad_norm": 0.06384344398975372,
|
36308 |
+
"learning_rate": 0.0006029785128332336,
|
36309 |
+
"loss": 1.512,
|
36310 |
+
"step": 10326
|
36311 |
+
},
|
36312 |
+
{
|
36313 |
+
"epoch": 0.45920590458405586,
|
36314 |
+
"grad_norm": 0.06703834980726242,
|
36315 |
+
"learning_rate": 0.0006028354539821079,
|
36316 |
+
"loss": 1.5041,
|
36317 |
+
"step": 10328
|
36318 |
+
},
|
36319 |
+
{
|
36320 |
+
"epoch": 0.4592948290427282,
|
36321 |
+
"grad_norm": 0.06500376015901566,
|
36322 |
+
"learning_rate": 0.0006026923863401545,
|
36323 |
+
"loss": 1.5061,
|
36324 |
+
"step": 10330
|
36325 |
+
},
|
36326 |
+
{
|
36327 |
+
"epoch": 0.45938375350140054,
|
36328 |
+
"grad_norm": 0.06535261869430542,
|
36329 |
+
"learning_rate": 0.0006025493099196033,
|
36330 |
+
"loss": 1.5052,
|
36331 |
+
"step": 10332
|
36332 |
+
},
|
36333 |
+
{
|
36334 |
+
"epoch": 0.4594726779600729,
|
36335 |
+
"grad_norm": 0.062189262360334396,
|
36336 |
+
"learning_rate": 0.0006024062247326854,
|
36337 |
+
"loss": 1.5009,
|
36338 |
+
"step": 10334
|
36339 |
+
},
|
36340 |
+
{
|
36341 |
+
"epoch": 0.4595616024187453,
|
36342 |
+
"grad_norm": 0.06319093704223633,
|
36343 |
+
"learning_rate": 0.0006022631307916318,
|
36344 |
+
"loss": 1.5052,
|
36345 |
+
"step": 10336
|
36346 |
+
},
|
36347 |
+
{
|
36348 |
+
"epoch": 0.45965052687741764,
|
36349 |
+
"grad_norm": 0.0611843504011631,
|
36350 |
+
"learning_rate": 0.0006021200281086753,
|
36351 |
+
"loss": 1.5051,
|
36352 |
+
"step": 10338
|
36353 |
+
},
|
36354 |
+
{
|
36355 |
+
"epoch": 0.45973945133609,
|
36356 |
+
"grad_norm": 0.06422754377126694,
|
36357 |
+
"learning_rate": 0.0006019769166960485,
|
36358 |
+
"loss": 1.5042,
|
36359 |
+
"step": 10340
|
36360 |
+
},
|
36361 |
+
{
|
36362 |
+
"epoch": 0.4598283757947623,
|
36363 |
+
"grad_norm": 0.06372174620628357,
|
36364 |
+
"learning_rate": 0.0006018337965659859,
|
36365 |
+
"loss": 1.513,
|
36366 |
+
"step": 10342
|
36367 |
+
},
|
36368 |
+
{
|
36369 |
+
"epoch": 0.4599173002534347,
|
36370 |
+
"grad_norm": 0.06303902715444565,
|
36371 |
+
"learning_rate": 0.0006016906677307213,
|
36372 |
+
"loss": 1.5053,
|
36373 |
+
"step": 10344
|
36374 |
+
},
|
36375 |
+
{
|
36376 |
+
"epoch": 0.46000622471210706,
|
36377 |
+
"grad_norm": 0.0631803497672081,
|
36378 |
+
"learning_rate": 0.0006015475302024904,
|
36379 |
+
"loss": 1.5102,
|
36380 |
+
"step": 10346
|
36381 |
+
},
|
36382 |
+
{
|
36383 |
+
"epoch": 0.4600951491707794,
|
36384 |
+
"grad_norm": 0.06362450867891312,
|
36385 |
+
"learning_rate": 0.0006014043839935291,
|
36386 |
+
"loss": 1.5115,
|
36387 |
+
"step": 10348
|
36388 |
+
},
|
36389 |
+
{
|
36390 |
+
"epoch": 0.4601840736294518,
|
36391 |
+
"grad_norm": 0.06407329440116882,
|
36392 |
+
"learning_rate": 0.0006012612291160743,
|
36393 |
+
"loss": 1.5065,
|
36394 |
+
"step": 10350
|
36395 |
+
},
|
36396 |
+
{
|
36397 |
+
"epoch": 0.46027299808812416,
|
36398 |
+
"grad_norm": 0.061808858066797256,
|
36399 |
+
"learning_rate": 0.0006011180655823632,
|
36400 |
+
"loss": 1.5125,
|
36401 |
+
"step": 10352
|
36402 |
+
},
|
36403 |
+
{
|
36404 |
+
"epoch": 0.4603619225467965,
|
36405 |
+
"grad_norm": 0.0652756616473198,
|
36406 |
+
"learning_rate": 0.0006009748934046343,
|
36407 |
+
"loss": 1.5064,
|
36408 |
+
"step": 10354
|
36409 |
+
},
|
36410 |
+
{
|
36411 |
+
"epoch": 0.46045084700546884,
|
36412 |
+
"grad_norm": 0.0631522387266159,
|
36413 |
+
"learning_rate": 0.0006008317125951265,
|
36414 |
+
"loss": 1.504,
|
36415 |
+
"step": 10356
|
36416 |
+
},
|
36417 |
+
{
|
36418 |
+
"epoch": 0.4605397714641412,
|
36419 |
+
"grad_norm": 0.0668981596827507,
|
36420 |
+
"learning_rate": 0.0006006885231660796,
|
36421 |
+
"loss": 1.5145,
|
36422 |
+
"step": 10358
|
36423 |
+
},
|
36424 |
+
{
|
36425 |
+
"epoch": 0.4606286959228136,
|
36426 |
+
"grad_norm": 0.06222165748476982,
|
36427 |
+
"learning_rate": 0.0006005453251297341,
|
36428 |
+
"loss": 1.5067,
|
36429 |
+
"step": 10360
|
36430 |
+
},
|
36431 |
+
{
|
36432 |
+
"epoch": 0.46071762038148595,
|
36433 |
+
"grad_norm": 0.06397759169340134,
|
36434 |
+
"learning_rate": 0.0006004021184983309,
|
36435 |
+
"loss": 1.5067,
|
36436 |
+
"step": 10362
|
36437 |
+
},
|
36438 |
+
{
|
36439 |
+
"epoch": 0.46080654484015826,
|
36440 |
+
"grad_norm": 0.060776520520448685,
|
36441 |
+
"learning_rate": 0.0006002589032841122,
|
36442 |
+
"loss": 1.5025,
|
36443 |
+
"step": 10364
|
36444 |
+
},
|
36445 |
+
{
|
36446 |
+
"epoch": 0.46089546929883063,
|
36447 |
+
"grad_norm": 0.06292383372783661,
|
36448 |
+
"learning_rate": 0.0006001156794993208,
|
36449 |
+
"loss": 1.4995,
|
36450 |
+
"step": 10366
|
36451 |
+
},
|
36452 |
+
{
|
36453 |
+
"epoch": 0.460984393757503,
|
36454 |
+
"grad_norm": 0.06334930658340454,
|
36455 |
+
"learning_rate": 0.0005999724471561999,
|
36456 |
+
"loss": 1.5038,
|
36457 |
+
"step": 10368
|
36458 |
+
},
|
36459 |
+
{
|
36460 |
+
"epoch": 0.46107331821617537,
|
36461 |
+
"grad_norm": 0.062282536178827286,
|
36462 |
+
"learning_rate": 0.0005998292062669935,
|
36463 |
+
"loss": 1.5019,
|
36464 |
+
"step": 10370
|
36465 |
+
},
|
36466 |
+
{
|
36467 |
+
"epoch": 0.46116224267484773,
|
36468 |
+
"grad_norm": 0.06332574039697647,
|
36469 |
+
"learning_rate": 0.0005996859568439468,
|
36470 |
+
"loss": 1.5022,
|
36471 |
+
"step": 10372
|
36472 |
+
},
|
36473 |
+
{
|
36474 |
+
"epoch": 0.4612511671335201,
|
36475 |
+
"grad_norm": 0.06254351884126663,
|
36476 |
+
"learning_rate": 0.000599542698899305,
|
36477 |
+
"loss": 1.513,
|
36478 |
+
"step": 10374
|
36479 |
+
},
|
36480 |
+
{
|
36481 |
+
"epoch": 0.4613400915921924,
|
36482 |
+
"grad_norm": 0.06307613104581833,
|
36483 |
+
"learning_rate": 0.0005993994324453147,
|
36484 |
+
"loss": 1.5088,
|
36485 |
+
"step": 10376
|
36486 |
+
},
|
36487 |
+
{
|
36488 |
+
"epoch": 0.4614290160508648,
|
36489 |
+
"grad_norm": 0.062051285058259964,
|
36490 |
+
"learning_rate": 0.0005992561574942229,
|
36491 |
+
"loss": 1.4984,
|
36492 |
+
"step": 10378
|
36493 |
+
},
|
36494 |
+
{
|
36495 |
+
"epoch": 0.46151794050953715,
|
36496 |
+
"grad_norm": 0.06389043480157852,
|
36497 |
+
"learning_rate": 0.0005991128740582773,
|
36498 |
+
"loss": 1.5078,
|
36499 |
+
"step": 10380
|
36500 |
+
},
|
36501 |
+
{
|
36502 |
+
"epoch": 0.4616068649682095,
|
36503 |
+
"grad_norm": 0.06286501884460449,
|
36504 |
+
"learning_rate": 0.0005989695821497266,
|
36505 |
+
"loss": 1.5044,
|
36506 |
+
"step": 10382
|
36507 |
+
},
|
36508 |
+
{
|
36509 |
+
"epoch": 0.4616957894268819,
|
36510 |
+
"grad_norm": 0.0667327269911766,
|
36511 |
+
"learning_rate": 0.0005988262817808198,
|
36512 |
+
"loss": 1.5093,
|
36513 |
+
"step": 10384
|
36514 |
+
},
|
36515 |
+
{
|
36516 |
+
"epoch": 0.4617847138855542,
|
36517 |
+
"grad_norm": 0.06277990341186523,
|
36518 |
+
"learning_rate": 0.0005986829729638069,
|
36519 |
+
"loss": 1.5009,
|
36520 |
+
"step": 10386
|
36521 |
+
},
|
36522 |
+
{
|
36523 |
+
"epoch": 0.46187363834422657,
|
36524 |
+
"grad_norm": 0.06250549107789993,
|
36525 |
+
"learning_rate": 0.0005985396557109386,
|
36526 |
+
"loss": 1.509,
|
36527 |
+
"step": 10388
|
36528 |
+
},
|
36529 |
+
{
|
36530 |
+
"epoch": 0.46196256280289894,
|
36531 |
+
"grad_norm": 0.06798355281352997,
|
36532 |
+
"learning_rate": 0.0005983963300344662,
|
36533 |
+
"loss": 1.5055,
|
36534 |
+
"step": 10390
|
36535 |
+
},
|
36536 |
+
{
|
36537 |
+
"epoch": 0.4620514872615713,
|
36538 |
+
"grad_norm": 0.06521153450012207,
|
36539 |
+
"learning_rate": 0.000598252995946642,
|
36540 |
+
"loss": 1.5108,
|
36541 |
+
"step": 10392
|
36542 |
+
},
|
36543 |
+
{
|
36544 |
+
"epoch": 0.46214041172024367,
|
36545 |
+
"grad_norm": 0.06334955245256424,
|
36546 |
+
"learning_rate": 0.0005981096534597186,
|
36547 |
+
"loss": 1.5062,
|
36548 |
+
"step": 10394
|
36549 |
+
},
|
36550 |
+
{
|
36551 |
+
"epoch": 0.462229336178916,
|
36552 |
+
"grad_norm": 0.06671233475208282,
|
36553 |
+
"learning_rate": 0.0005979663025859499,
|
36554 |
+
"loss": 1.508,
|
36555 |
+
"step": 10396
|
36556 |
+
},
|
36557 |
+
{
|
36558 |
+
"epoch": 0.46231826063758835,
|
36559 |
+
"grad_norm": 0.06669692695140839,
|
36560 |
+
"learning_rate": 0.0005978229433375897,
|
36561 |
+
"loss": 1.5016,
|
36562 |
+
"step": 10398
|
36563 |
+
},
|
36564 |
+
{
|
36565 |
+
"epoch": 0.4624071850962607,
|
36566 |
+
"grad_norm": 0.06515517830848694,
|
36567 |
+
"learning_rate": 0.0005976795757268933,
|
36568 |
+
"loss": 1.5029,
|
36569 |
+
"step": 10400
|
36570 |
+
},
|
36571 |
+
{
|
36572 |
+
"epoch": 0.4624961095549331,
|
36573 |
+
"grad_norm": 0.06504440307617188,
|
36574 |
+
"learning_rate": 0.0005975361997661162,
|
36575 |
+
"loss": 1.5052,
|
36576 |
+
"step": 10402
|
36577 |
+
},
|
36578 |
+
{
|
36579 |
+
"epoch": 0.46258503401360546,
|
36580 |
+
"grad_norm": 0.06195230036973953,
|
36581 |
+
"learning_rate": 0.0005973928154675151,
|
36582 |
+
"loss": 1.5033,
|
36583 |
+
"step": 10404
|
36584 |
+
},
|
36585 |
+
{
|
36586 |
+
"epoch": 0.4626739584722778,
|
36587 |
+
"grad_norm": 0.0632019117474556,
|
36588 |
+
"learning_rate": 0.0005972494228433468,
|
36589 |
+
"loss": 1.5057,
|
36590 |
+
"step": 10406
|
36591 |
+
},
|
36592 |
+
{
|
36593 |
+
"epoch": 0.46276288293095014,
|
36594 |
+
"grad_norm": 0.06470786780118942,
|
36595 |
+
"learning_rate": 0.0005971060219058694,
|
36596 |
+
"loss": 1.5045,
|
36597 |
+
"step": 10408
|
36598 |
+
},
|
36599 |
+
{
|
36600 |
+
"epoch": 0.4628518073896225,
|
36601 |
+
"grad_norm": 0.06392474472522736,
|
36602 |
+
"learning_rate": 0.000596962612667341,
|
36603 |
+
"loss": 1.506,
|
36604 |
+
"step": 10410
|
36605 |
+
},
|
36606 |
+
{
|
36607 |
+
"epoch": 0.4629407318482949,
|
36608 |
+
"grad_norm": 0.06552916020154953,
|
36609 |
+
"learning_rate": 0.0005968191951400215,
|
36610 |
+
"loss": 1.5066,
|
36611 |
+
"step": 10412
|
36612 |
+
},
|
36613 |
+
{
|
36614 |
+
"epoch": 0.46302965630696724,
|
36615 |
+
"grad_norm": 0.06476908922195435,
|
36616 |
+
"learning_rate": 0.0005966757693361705,
|
36617 |
+
"loss": 1.5028,
|
36618 |
+
"step": 10414
|
36619 |
+
},
|
36620 |
+
{
|
36621 |
+
"epoch": 0.4631185807656396,
|
36622 |
+
"grad_norm": 0.06308183819055557,
|
36623 |
+
"learning_rate": 0.0005965323352680486,
|
36624 |
+
"loss": 1.5073,
|
36625 |
+
"step": 10416
|
36626 |
+
},
|
36627 |
+
{
|
36628 |
+
"epoch": 0.4632075052243119,
|
36629 |
+
"grad_norm": 0.06603847444057465,
|
36630 |
+
"learning_rate": 0.0005963888929479173,
|
36631 |
+
"loss": 1.516,
|
36632 |
+
"step": 10418
|
36633 |
+
},
|
36634 |
+
{
|
36635 |
+
"epoch": 0.4632964296829843,
|
36636 |
+
"grad_norm": 0.06707409769296646,
|
36637 |
+
"learning_rate": 0.0005962454423880387,
|
36638 |
+
"loss": 1.5026,
|
36639 |
+
"step": 10420
|
36640 |
+
},
|
36641 |
+
{
|
36642 |
+
"epoch": 0.46338535414165666,
|
36643 |
+
"grad_norm": 0.06557576358318329,
|
36644 |
+
"learning_rate": 0.0005961019836006755,
|
36645 |
+
"loss": 1.5101,
|
36646 |
+
"step": 10422
|
36647 |
+
},
|
36648 |
+
{
|
36649 |
+
"epoch": 0.463474278600329,
|
36650 |
+
"grad_norm": 0.06399369239807129,
|
36651 |
+
"learning_rate": 0.0005959585165980912,
|
36652 |
+
"loss": 1.5085,
|
36653 |
+
"step": 10424
|
36654 |
+
},
|
36655 |
+
{
|
36656 |
+
"epoch": 0.4635632030590014,
|
36657 |
+
"grad_norm": 0.06383886933326721,
|
36658 |
+
"learning_rate": 0.0005958150413925501,
|
36659 |
+
"loss": 1.5059,
|
36660 |
+
"step": 10426
|
36661 |
+
},
|
36662 |
+
{
|
36663 |
+
"epoch": 0.46365212751767376,
|
36664 |
+
"grad_norm": 0.0634358674287796,
|
36665 |
+
"learning_rate": 0.000595671557996317,
|
36666 |
+
"loss": 1.5054,
|
36667 |
+
"step": 10428
|
36668 |
+
},
|
36669 |
+
{
|
36670 |
+
"epoch": 0.4637410519763461,
|
36671 |
+
"grad_norm": 0.06545853614807129,
|
36672 |
+
"learning_rate": 0.0005955280664216575,
|
36673 |
+
"loss": 1.513,
|
36674 |
+
"step": 10430
|
36675 |
+
},
|
36676 |
+
{
|
36677 |
+
"epoch": 0.46382997643501844,
|
36678 |
+
"grad_norm": 0.06597691774368286,
|
36679 |
+
"learning_rate": 0.0005953845666808378,
|
36680 |
+
"loss": 1.5063,
|
36681 |
+
"step": 10432
|
36682 |
+
},
|
36683 |
+
{
|
36684 |
+
"epoch": 0.4639189008936908,
|
36685 |
+
"grad_norm": 0.06610873341560364,
|
36686 |
+
"learning_rate": 0.0005952410587861251,
|
36687 |
+
"loss": 1.5096,
|
36688 |
+
"step": 10434
|
36689 |
+
},
|
36690 |
+
{
|
36691 |
+
"epoch": 0.4640078253523632,
|
36692 |
+
"grad_norm": 0.06371376663446426,
|
36693 |
+
"learning_rate": 0.0005950975427497871,
|
36694 |
+
"loss": 1.5077,
|
36695 |
+
"step": 10436
|
36696 |
+
},
|
36697 |
+
{
|
36698 |
+
"epoch": 0.46409674981103555,
|
36699 |
+
"grad_norm": 0.06398597359657288,
|
36700 |
+
"learning_rate": 0.0005949540185840919,
|
36701 |
+
"loss": 1.5092,
|
36702 |
+
"step": 10438
|
36703 |
+
},
|
36704 |
+
{
|
36705 |
+
"epoch": 0.46418567426970786,
|
36706 |
+
"grad_norm": 0.06583784520626068,
|
36707 |
+
"learning_rate": 0.0005948104863013089,
|
36708 |
+
"loss": 1.5122,
|
36709 |
+
"step": 10440
|
36710 |
+
},
|
36711 |
+
{
|
36712 |
+
"epoch": 0.46427459872838023,
|
36713 |
+
"grad_norm": 0.06568284332752228,
|
36714 |
+
"learning_rate": 0.0005946669459137075,
|
36715 |
+
"loss": 1.5076,
|
36716 |
+
"step": 10442
|
36717 |
+
},
|
36718 |
+
{
|
36719 |
+
"epoch": 0.4643635231870526,
|
36720 |
+
"grad_norm": 0.06291704624891281,
|
36721 |
+
"learning_rate": 0.0005945233974335585,
|
36722 |
+
"loss": 1.5103,
|
36723 |
+
"step": 10444
|
36724 |
+
},
|
36725 |
+
{
|
36726 |
+
"epoch": 0.46445244764572496,
|
36727 |
+
"grad_norm": 0.06666183471679688,
|
36728 |
+
"learning_rate": 0.0005943798408731329,
|
36729 |
+
"loss": 1.5065,
|
36730 |
+
"step": 10446
|
36731 |
+
},
|
36732 |
+
{
|
36733 |
+
"epoch": 0.46454137210439733,
|
36734 |
+
"grad_norm": 0.06522826105356216,
|
36735 |
+
"learning_rate": 0.0005942362762447026,
|
36736 |
+
"loss": 1.5054,
|
36737 |
+
"step": 10448
|
36738 |
+
},
|
36739 |
+
{
|
36740 |
+
"epoch": 0.46463029656306964,
|
36741 |
+
"grad_norm": 0.06623510271310806,
|
36742 |
+
"learning_rate": 0.0005940927035605403,
|
36743 |
+
"loss": 1.5098,
|
36744 |
+
"step": 10450
|
36745 |
+
},
|
36746 |
+
{
|
36747 |
+
"epoch": 0.464719221021742,
|
36748 |
+
"grad_norm": 0.06403439491987228,
|
36749 |
+
"learning_rate": 0.0005939491228329187,
|
36750 |
+
"loss": 1.5,
|
36751 |
+
"step": 10452
|
36752 |
+
},
|
36753 |
+
{
|
36754 |
+
"epoch": 0.4648081454804144,
|
36755 |
+
"grad_norm": 0.0637822076678276,
|
36756 |
+
"learning_rate": 0.0005938055340741123,
|
36757 |
+
"loss": 1.5119,
|
36758 |
+
"step": 10454
|
36759 |
+
},
|
36760 |
+
{
|
36761 |
+
"epoch": 0.46489706993908675,
|
36762 |
+
"grad_norm": 0.0661793127655983,
|
36763 |
+
"learning_rate": 0.0005936619372963953,
|
36764 |
+
"loss": 1.5033,
|
36765 |
+
"step": 10456
|
36766 |
+
},
|
36767 |
+
{
|
36768 |
+
"epoch": 0.4649859943977591,
|
36769 |
+
"grad_norm": 0.06596322357654572,
|
36770 |
+
"learning_rate": 0.0005935183325120433,
|
36771 |
+
"loss": 1.5056,
|
36772 |
+
"step": 10458
|
36773 |
+
},
|
36774 |
+
{
|
36775 |
+
"epoch": 0.4650749188564315,
|
36776 |
+
"grad_norm": 0.06263718008995056,
|
36777 |
+
"learning_rate": 0.000593374719733332,
|
36778 |
+
"loss": 1.5065,
|
36779 |
+
"step": 10460
|
36780 |
+
},
|
36781 |
+
{
|
36782 |
+
"epoch": 0.4651638433151038,
|
36783 |
+
"grad_norm": 0.06337321549654007,
|
36784 |
+
"learning_rate": 0.0005932310989725382,
|
36785 |
+
"loss": 1.5115,
|
36786 |
+
"step": 10462
|
36787 |
+
},
|
36788 |
+
{
|
36789 |
+
"epoch": 0.46525276777377617,
|
36790 |
+
"grad_norm": 0.0630149319767952,
|
36791 |
+
"learning_rate": 0.0005930874702419392,
|
36792 |
+
"loss": 1.5084,
|
36793 |
+
"step": 10464
|
36794 |
+
},
|
36795 |
+
{
|
36796 |
+
"epoch": 0.46534169223244853,
|
36797 |
+
"grad_norm": 0.06514789909124374,
|
36798 |
+
"learning_rate": 0.0005929438335538131,
|
36799 |
+
"loss": 1.5035,
|
36800 |
+
"step": 10466
|
36801 |
+
},
|
36802 |
+
{
|
36803 |
+
"epoch": 0.4654306166911209,
|
36804 |
+
"grad_norm": 0.061315376311540604,
|
36805 |
+
"learning_rate": 0.0005928001889204385,
|
36806 |
+
"loss": 1.5073,
|
36807 |
+
"step": 10468
|
36808 |
+
},
|
36809 |
+
{
|
36810 |
+
"epoch": 0.46551954114979327,
|
36811 |
+
"grad_norm": 0.06364026665687561,
|
36812 |
+
"learning_rate": 0.0005926565363540947,
|
36813 |
+
"loss": 1.5027,
|
36814 |
+
"step": 10470
|
36815 |
+
},
|
36816 |
+
{
|
36817 |
+
"epoch": 0.4656084656084656,
|
36818 |
+
"grad_norm": 0.06301172822713852,
|
36819 |
+
"learning_rate": 0.0005925128758670619,
|
36820 |
+
"loss": 1.5064,
|
36821 |
+
"step": 10472
|
36822 |
+
},
|
36823 |
+
{
|
36824 |
+
"epoch": 0.46569739006713795,
|
36825 |
+
"grad_norm": 0.06264245510101318,
|
36826 |
+
"learning_rate": 0.0005923692074716209,
|
36827 |
+
"loss": 1.5051,
|
36828 |
+
"step": 10474
|
36829 |
+
},
|
36830 |
+
{
|
36831 |
+
"epoch": 0.4657863145258103,
|
36832 |
+
"grad_norm": 0.06521310657262802,
|
36833 |
+
"learning_rate": 0.0005922255311800529,
|
36834 |
+
"loss": 1.5008,
|
36835 |
+
"step": 10476
|
36836 |
+
},
|
36837 |
+
{
|
36838 |
+
"epoch": 0.4658752389844827,
|
36839 |
+
"grad_norm": 0.06265758723020554,
|
36840 |
+
"learning_rate": 0.0005920818470046399,
|
36841 |
+
"loss": 1.5011,
|
36842 |
+
"step": 10478
|
36843 |
+
},
|
36844 |
+
{
|
36845 |
+
"epoch": 0.46596416344315505,
|
36846 |
+
"grad_norm": 0.06298228353261948,
|
36847 |
+
"learning_rate": 0.000591938154957665,
|
36848 |
+
"loss": 1.5046,
|
36849 |
+
"step": 10480
|
36850 |
+
},
|
36851 |
+
{
|
36852 |
+
"epoch": 0.4660530879018274,
|
36853 |
+
"grad_norm": 0.06504108011722565,
|
36854 |
+
"learning_rate": 0.0005917944550514114,
|
36855 |
+
"loss": 1.5035,
|
36856 |
+
"step": 10482
|
36857 |
+
},
|
36858 |
+
{
|
36859 |
+
"epoch": 0.46614201236049974,
|
36860 |
+
"grad_norm": 0.0628167986869812,
|
36861 |
+
"learning_rate": 0.0005916507472981632,
|
36862 |
+
"loss": 1.5018,
|
36863 |
+
"step": 10484
|
36864 |
+
},
|
36865 |
+
{
|
36866 |
+
"epoch": 0.4662309368191721,
|
36867 |
+
"grad_norm": 0.06394867599010468,
|
36868 |
+
"learning_rate": 0.0005915070317102053,
|
36869 |
+
"loss": 1.5055,
|
36870 |
+
"step": 10486
|
36871 |
+
},
|
36872 |
+
{
|
36873 |
+
"epoch": 0.46631986127784447,
|
36874 |
+
"grad_norm": 0.06346921622753143,
|
36875 |
+
"learning_rate": 0.0005913633082998231,
|
36876 |
+
"loss": 1.5132,
|
36877 |
+
"step": 10488
|
36878 |
+
},
|
36879 |
+
{
|
36880 |
+
"epoch": 0.46640878573651684,
|
36881 |
+
"grad_norm": 0.06185340881347656,
|
36882 |
+
"learning_rate": 0.0005912195770793028,
|
36883 |
+
"loss": 1.5051,
|
36884 |
+
"step": 10490
|
36885 |
+
},
|
36886 |
+
{
|
36887 |
+
"epoch": 0.4664977101951892,
|
36888 |
+
"grad_norm": 0.06247565150260925,
|
36889 |
+
"learning_rate": 0.0005910758380609308,
|
36890 |
+
"loss": 1.5057,
|
36891 |
+
"step": 10492
|
36892 |
+
},
|
36893 |
+
{
|
36894 |
+
"epoch": 0.4665866346538615,
|
36895 |
+
"grad_norm": 0.061248041689395905,
|
36896 |
+
"learning_rate": 0.000590932091256995,
|
36897 |
+
"loss": 1.4972,
|
36898 |
+
"step": 10494
|
36899 |
+
},
|
36900 |
+
{
|
36901 |
+
"epoch": 0.4666755591125339,
|
36902 |
+
"grad_norm": 0.062036920338869095,
|
36903 |
+
"learning_rate": 0.0005907883366797832,
|
36904 |
+
"loss": 1.5055,
|
36905 |
+
"step": 10496
|
36906 |
+
},
|
36907 |
+
{
|
36908 |
+
"epoch": 0.46676448357120626,
|
36909 |
+
"grad_norm": 0.0646434798836708,
|
36910 |
+
"learning_rate": 0.0005906445743415845,
|
36911 |
+
"loss": 1.5068,
|
36912 |
+
"step": 10498
|
36913 |
+
},
|
36914 |
+
{
|
36915 |
+
"epoch": 0.4668534080298786,
|
36916 |
+
"grad_norm": 0.06403973698616028,
|
36917 |
+
"learning_rate": 0.0005905008042546878,
|
36918 |
+
"loss": 1.5053,
|
36919 |
+
"step": 10500
|
36920 |
+
},
|
36921 |
+
{
|
36922 |
+
"epoch": 0.4668534080298786,
|
36923 |
+
"eval_loss": 1.4856921434402466,
|
36924 |
+
"eval_runtime": 12.4102,
|
36925 |
+
"eval_samples_per_second": 556.799,
|
36926 |
+
"eval_steps_per_second": 69.62,
|
36927 |
+
"step": 10500
|
36928 |
+
},
|
36929 |
+
{
|
36930 |
+
"epoch": 0.466942332488551,
|
36931 |
+
"grad_norm": 0.06568461656570435,
|
36932 |
+
"learning_rate": 0.0005903570264313837,
|
36933 |
+
"loss": 1.5073,
|
36934 |
+
"step": 10502
|
36935 |
+
},
|
36936 |
+
{
|
36937 |
+
"epoch": 0.46703125694722336,
|
36938 |
+
"grad_norm": 0.06413400918245316,
|
36939 |
+
"learning_rate": 0.0005902132408839626,
|
36940 |
+
"loss": 1.5047,
|
36941 |
+
"step": 10504
|
36942 |
+
},
|
36943 |
+
{
|
36944 |
+
"epoch": 0.4671201814058957,
|
36945 |
+
"grad_norm": 0.06379328668117523,
|
36946 |
+
"learning_rate": 0.0005900694476247164,
|
36947 |
+
"loss": 1.5017,
|
36948 |
+
"step": 10506
|
36949 |
+
},
|
36950 |
+
{
|
36951 |
+
"epoch": 0.46720910586456804,
|
36952 |
+
"grad_norm": 0.06230514496564865,
|
36953 |
+
"learning_rate": 0.0005899256466659369,
|
36954 |
+
"loss": 1.5085,
|
36955 |
+
"step": 10508
|
36956 |
+
},
|
36957 |
+
{
|
36958 |
+
"epoch": 0.4672980303232404,
|
36959 |
+
"grad_norm": 0.06407498568296432,
|
36960 |
+
"learning_rate": 0.0005897818380199165,
|
36961 |
+
"loss": 1.5085,
|
36962 |
+
"step": 10510
|
36963 |
+
},
|
36964 |
+
{
|
36965 |
+
"epoch": 0.4673869547819128,
|
36966 |
+
"grad_norm": 0.06471338123083115,
|
36967 |
+
"learning_rate": 0.0005896380216989495,
|
36968 |
+
"loss": 1.5057,
|
36969 |
+
"step": 10512
|
36970 |
+
},
|
36971 |
+
{
|
36972 |
+
"epoch": 0.46747587924058515,
|
36973 |
+
"grad_norm": 0.06504790484905243,
|
36974 |
+
"learning_rate": 0.0005894941977153289,
|
36975 |
+
"loss": 1.5093,
|
36976 |
+
"step": 10514
|
36977 |
+
},
|
36978 |
+
{
|
36979 |
+
"epoch": 0.46756480369925746,
|
36980 |
+
"grad_norm": 0.06497068703174591,
|
36981 |
+
"learning_rate": 0.0005893503660813499,
|
36982 |
+
"loss": 1.5046,
|
36983 |
+
"step": 10516
|
36984 |
+
},
|
36985 |
+
{
|
36986 |
+
"epoch": 0.4676537281579298,
|
36987 |
+
"grad_norm": 0.06451025605201721,
|
36988 |
+
"learning_rate": 0.000589206526809308,
|
36989 |
+
"loss": 1.504,
|
36990 |
+
"step": 10518
|
36991 |
+
},
|
36992 |
+
{
|
36993 |
+
"epoch": 0.4677426526166022,
|
36994 |
+
"grad_norm": 0.062198251485824585,
|
36995 |
+
"learning_rate": 0.0005890626799114991,
|
36996 |
+
"loss": 1.5068,
|
36997 |
+
"step": 10520
|
36998 |
+
},
|
36999 |
+
{
|
37000 |
+
"epoch": 0.46783157707527456,
|
37001 |
+
"grad_norm": 0.0641157478094101,
|
37002 |
+
"learning_rate": 0.0005889188254002198,
|
37003 |
+
"loss": 1.5027,
|
37004 |
+
"step": 10522
|
37005 |
+
},
|
37006 |
+
{
|
37007 |
+
"epoch": 0.46792050153394693,
|
37008 |
+
"grad_norm": 0.06388696283102036,
|
37009 |
+
"learning_rate": 0.0005887749632877673,
|
37010 |
+
"loss": 1.5103,
|
37011 |
+
"step": 10524
|
37012 |
+
},
|
37013 |
+
{
|
37014 |
+
"epoch": 0.46800942599261924,
|
37015 |
+
"grad_norm": 0.06389610469341278,
|
37016 |
+
"learning_rate": 0.0005886310935864399,
|
37017 |
+
"loss": 1.5049,
|
37018 |
+
"step": 10526
|
37019 |
+
},
|
37020 |
+
{
|
37021 |
+
"epoch": 0.4680983504512916,
|
37022 |
+
"grad_norm": 0.06436503678560257,
|
37023 |
+
"learning_rate": 0.0005884872163085359,
|
37024 |
+
"loss": 1.5033,
|
37025 |
+
"step": 10528
|
37026 |
+
},
|
37027 |
+
{
|
37028 |
+
"epoch": 0.468187274909964,
|
37029 |
+
"grad_norm": 0.06388412415981293,
|
37030 |
+
"learning_rate": 0.0005883433314663549,
|
37031 |
+
"loss": 1.5082,
|
37032 |
+
"step": 10530
|
37033 |
+
},
|
37034 |
+
{
|
37035 |
+
"epoch": 0.46827619936863635,
|
37036 |
+
"grad_norm": 0.06415783613920212,
|
37037 |
+
"learning_rate": 0.0005881994390721964,
|
37038 |
+
"loss": 1.5031,
|
37039 |
+
"step": 10532
|
37040 |
+
},
|
37041 |
+
{
|
37042 |
+
"epoch": 0.4683651238273087,
|
37043 |
+
"grad_norm": 0.06618549674749374,
|
37044 |
+
"learning_rate": 0.0005880555391383613,
|
37045 |
+
"loss": 1.4983,
|
37046 |
+
"step": 10534
|
37047 |
+
},
|
37048 |
+
{
|
37049 |
+
"epoch": 0.4684540482859811,
|
37050 |
+
"grad_norm": 0.06373777985572815,
|
37051 |
+
"learning_rate": 0.0005879116316771507,
|
37052 |
+
"loss": 1.5058,
|
37053 |
+
"step": 10536
|
37054 |
+
},
|
37055 |
+
{
|
37056 |
+
"epoch": 0.4685429727446534,
|
37057 |
+
"grad_norm": 0.06715410202741623,
|
37058 |
+
"learning_rate": 0.0005877677167008663,
|
37059 |
+
"loss": 1.5013,
|
37060 |
+
"step": 10538
|
37061 |
+
},
|
37062 |
+
{
|
37063 |
+
"epoch": 0.46863189720332576,
|
37064 |
+
"grad_norm": 0.0642244890332222,
|
37065 |
+
"learning_rate": 0.0005876237942218107,
|
37066 |
+
"loss": 1.5073,
|
37067 |
+
"step": 10540
|
37068 |
+
},
|
37069 |
+
{
|
37070 |
+
"epoch": 0.46872082166199813,
|
37071 |
+
"grad_norm": 0.06601016223430634,
|
37072 |
+
"learning_rate": 0.0005874798642522869,
|
37073 |
+
"loss": 1.4981,
|
37074 |
+
"step": 10542
|
37075 |
+
},
|
37076 |
+
{
|
37077 |
+
"epoch": 0.4688097461206705,
|
37078 |
+
"grad_norm": 0.06458527594804764,
|
37079 |
+
"learning_rate": 0.0005873359268045991,
|
37080 |
+
"loss": 1.5065,
|
37081 |
+
"step": 10544
|
37082 |
+
},
|
37083 |
+
{
|
37084 |
+
"epoch": 0.46889867057934287,
|
37085 |
+
"grad_norm": 0.06326668709516525,
|
37086 |
+
"learning_rate": 0.0005871919818910511,
|
37087 |
+
"loss": 1.5067,
|
37088 |
+
"step": 10546
|
37089 |
+
},
|
37090 |
+
{
|
37091 |
+
"epoch": 0.4689875950380152,
|
37092 |
+
"grad_norm": 0.06331662088632584,
|
37093 |
+
"learning_rate": 0.0005870480295239486,
|
37094 |
+
"loss": 1.5053,
|
37095 |
+
"step": 10548
|
37096 |
+
},
|
37097 |
+
{
|
37098 |
+
"epoch": 0.46907651949668755,
|
37099 |
+
"grad_norm": 0.0645529255270958,
|
37100 |
+
"learning_rate": 0.0005869040697155966,
|
37101 |
+
"loss": 1.5113,
|
37102 |
+
"step": 10550
|
37103 |
+
},
|
37104 |
+
{
|
37105 |
+
"epoch": 0.4691654439553599,
|
37106 |
+
"grad_norm": 0.0635153278708458,
|
37107 |
+
"learning_rate": 0.0005867601024783021,
|
37108 |
+
"loss": 1.503,
|
37109 |
+
"step": 10552
|
37110 |
+
},
|
37111 |
+
{
|
37112 |
+
"epoch": 0.4692543684140323,
|
37113 |
+
"grad_norm": 0.06458617746829987,
|
37114 |
+
"learning_rate": 0.0005866161278243713,
|
37115 |
+
"loss": 1.5021,
|
37116 |
+
"step": 10554
|
37117 |
+
},
|
37118 |
+
{
|
37119 |
+
"epoch": 0.46934329287270465,
|
37120 |
+
"grad_norm": 0.0654539167881012,
|
37121 |
+
"learning_rate": 0.0005864721457661124,
|
37122 |
+
"loss": 1.5025,
|
37123 |
+
"step": 10556
|
37124 |
+
},
|
37125 |
+
{
|
37126 |
+
"epoch": 0.469432217331377,
|
37127 |
+
"grad_norm": 0.06510742008686066,
|
37128 |
+
"learning_rate": 0.0005863281563158332,
|
37129 |
+
"loss": 1.5045,
|
37130 |
+
"step": 10558
|
37131 |
+
},
|
37132 |
+
{
|
37133 |
+
"epoch": 0.46952114179004933,
|
37134 |
+
"grad_norm": 0.0627632662653923,
|
37135 |
+
"learning_rate": 0.000586184159485843,
|
37136 |
+
"loss": 1.5048,
|
37137 |
+
"step": 10560
|
37138 |
+
},
|
37139 |
+
{
|
37140 |
+
"epoch": 0.4696100662487217,
|
37141 |
+
"grad_norm": 0.06467565894126892,
|
37142 |
+
"learning_rate": 0.000586040155288451,
|
37143 |
+
"loss": 1.5027,
|
37144 |
+
"step": 10562
|
37145 |
+
},
|
37146 |
+
{
|
37147 |
+
"epoch": 0.46969899070739407,
|
37148 |
+
"grad_norm": 0.06286469101905823,
|
37149 |
+
"learning_rate": 0.0005858961437359674,
|
37150 |
+
"loss": 1.4974,
|
37151 |
+
"step": 10564
|
37152 |
+
},
|
37153 |
+
{
|
37154 |
+
"epoch": 0.46978791516606644,
|
37155 |
+
"grad_norm": 0.06415469199419022,
|
37156 |
+
"learning_rate": 0.0005857521248407027,
|
37157 |
+
"loss": 1.5036,
|
37158 |
+
"step": 10566
|
37159 |
+
},
|
37160 |
+
{
|
37161 |
+
"epoch": 0.4698768396247388,
|
37162 |
+
"grad_norm": 0.06374349445104599,
|
37163 |
+
"learning_rate": 0.0005856080986149687,
|
37164 |
+
"loss": 1.4962,
|
37165 |
+
"step": 10568
|
37166 |
+
},
|
37167 |
+
{
|
37168 |
+
"epoch": 0.4699657640834111,
|
37169 |
+
"grad_norm": 0.0641632080078125,
|
37170 |
+
"learning_rate": 0.0005854640650710771,
|
37171 |
+
"loss": 1.5011,
|
37172 |
+
"step": 10570
|
37173 |
+
},
|
37174 |
+
{
|
37175 |
+
"epoch": 0.4700546885420835,
|
37176 |
+
"grad_norm": 0.06440167129039764,
|
37177 |
+
"learning_rate": 0.0005853200242213405,
|
37178 |
+
"loss": 1.5109,
|
37179 |
+
"step": 10572
|
37180 |
+
},
|
37181 |
+
{
|
37182 |
+
"epoch": 0.47014361300075586,
|
37183 |
+
"grad_norm": 0.06403553485870361,
|
37184 |
+
"learning_rate": 0.0005851759760780724,
|
37185 |
+
"loss": 1.5112,
|
37186 |
+
"step": 10574
|
37187 |
+
},
|
37188 |
+
{
|
37189 |
+
"epoch": 0.4702325374594282,
|
37190 |
+
"grad_norm": 0.06359109282493591,
|
37191 |
+
"learning_rate": 0.0005850319206535863,
|
37192 |
+
"loss": 1.5012,
|
37193 |
+
"step": 10576
|
37194 |
+
},
|
37195 |
+
{
|
37196 |
+
"epoch": 0.4703214619181006,
|
37197 |
+
"grad_norm": 0.06405449658632278,
|
37198 |
+
"learning_rate": 0.0005848878579601971,
|
37199 |
+
"loss": 1.5061,
|
37200 |
+
"step": 10578
|
37201 |
+
},
|
37202 |
+
{
|
37203 |
+
"epoch": 0.4704103863767729,
|
37204 |
+
"grad_norm": 0.06662164628505707,
|
37205 |
+
"learning_rate": 0.0005847437880102196,
|
37206 |
+
"loss": 1.5053,
|
37207 |
+
"step": 10580
|
37208 |
+
},
|
37209 |
+
{
|
37210 |
+
"epoch": 0.4704993108354453,
|
37211 |
+
"grad_norm": 0.06323253363370895,
|
37212 |
+
"learning_rate": 0.0005845997108159697,
|
37213 |
+
"loss": 1.5118,
|
37214 |
+
"step": 10582
|
37215 |
+
},
|
37216 |
+
{
|
37217 |
+
"epoch": 0.47058823529411764,
|
37218 |
+
"grad_norm": 0.06517712771892548,
|
37219 |
+
"learning_rate": 0.0005844556263897637,
|
37220 |
+
"loss": 1.5091,
|
37221 |
+
"step": 10584
|
37222 |
+
},
|
37223 |
+
{
|
37224 |
+
"epoch": 0.47067715975279,
|
37225 |
+
"grad_norm": 0.0638645812869072,
|
37226 |
+
"learning_rate": 0.0005843115347439184,
|
37227 |
+
"loss": 1.5093,
|
37228 |
+
"step": 10586
|
37229 |
+
},
|
37230 |
+
{
|
37231 |
+
"epoch": 0.4707660842114624,
|
37232 |
+
"grad_norm": 0.06771344691514969,
|
37233 |
+
"learning_rate": 0.0005841674358907517,
|
37234 |
+
"loss": 1.512,
|
37235 |
+
"step": 10588
|
37236 |
+
},
|
37237 |
+
{
|
37238 |
+
"epoch": 0.47085500867013474,
|
37239 |
+
"grad_norm": 0.06667336076498032,
|
37240 |
+
"learning_rate": 0.0005840233298425818,
|
37241 |
+
"loss": 1.5051,
|
37242 |
+
"step": 10590
|
37243 |
+
},
|
37244 |
+
{
|
37245 |
+
"epoch": 0.47094393312880706,
|
37246 |
+
"grad_norm": 0.06689155846834183,
|
37247 |
+
"learning_rate": 0.0005838792166117273,
|
37248 |
+
"loss": 1.5107,
|
37249 |
+
"step": 10592
|
37250 |
+
},
|
37251 |
+
{
|
37252 |
+
"epoch": 0.4710328575874794,
|
37253 |
+
"grad_norm": 0.06521068513393402,
|
37254 |
+
"learning_rate": 0.0005837350962105076,
|
37255 |
+
"loss": 1.508,
|
37256 |
+
"step": 10594
|
37257 |
+
},
|
37258 |
+
{
|
37259 |
+
"epoch": 0.4711217820461518,
|
37260 |
+
"grad_norm": 0.06260724365711212,
|
37261 |
+
"learning_rate": 0.0005835909686512429,
|
37262 |
+
"loss": 1.4977,
|
37263 |
+
"step": 10596
|
37264 |
+
},
|
37265 |
+
{
|
37266 |
+
"epoch": 0.47121070650482416,
|
37267 |
+
"grad_norm": 0.06671774387359619,
|
37268 |
+
"learning_rate": 0.0005834468339462539,
|
37269 |
+
"loss": 1.5029,
|
37270 |
+
"step": 10598
|
37271 |
+
},
|
37272 |
+
{
|
37273 |
+
"epoch": 0.47129963096349653,
|
37274 |
+
"grad_norm": 0.0636298730969429,
|
37275 |
+
"learning_rate": 0.0005833026921078616,
|
37276 |
+
"loss": 1.5078,
|
37277 |
+
"step": 10600
|
37278 |
+
},
|
37279 |
+
{
|
37280 |
+
"epoch": 0.47138855542216884,
|
37281 |
+
"grad_norm": 0.0641683042049408,
|
37282 |
+
"learning_rate": 0.0005831585431483883,
|
37283 |
+
"loss": 1.4961,
|
37284 |
+
"step": 10602
|
37285 |
+
},
|
37286 |
+
{
|
37287 |
+
"epoch": 0.4714774798808412,
|
37288 |
+
"grad_norm": 0.06526412814855576,
|
37289 |
+
"learning_rate": 0.0005830143870801562,
|
37290 |
+
"loss": 1.5074,
|
37291 |
+
"step": 10604
|
37292 |
+
},
|
37293 |
+
{
|
37294 |
+
"epoch": 0.4715664043395136,
|
37295 |
+
"grad_norm": 0.06388209015130997,
|
37296 |
+
"learning_rate": 0.0005828702239154886,
|
37297 |
+
"loss": 1.5082,
|
37298 |
+
"step": 10606
|
37299 |
+
},
|
37300 |
+
{
|
37301 |
+
"epoch": 0.47165532879818595,
|
37302 |
+
"grad_norm": 0.0643208771944046,
|
37303 |
+
"learning_rate": 0.0005827260536667089,
|
37304 |
+
"loss": 1.5034,
|
37305 |
+
"step": 10608
|
37306 |
+
},
|
37307 |
+
{
|
37308 |
+
"epoch": 0.4717442532568583,
|
37309 |
+
"grad_norm": 0.0622149333357811,
|
37310 |
+
"learning_rate": 0.0005825818763461416,
|
37311 |
+
"loss": 1.5036,
|
37312 |
+
"step": 10610
|
37313 |
+
},
|
37314 |
+
{
|
37315 |
+
"epoch": 0.4718331777155307,
|
37316 |
+
"grad_norm": 0.06327205151319504,
|
37317 |
+
"learning_rate": 0.0005824376919661114,
|
37318 |
+
"loss": 1.5018,
|
37319 |
+
"step": 10612
|
37320 |
+
},
|
37321 |
+
{
|
37322 |
+
"epoch": 0.471922102174203,
|
37323 |
+
"grad_norm": 0.06469540297985077,
|
37324 |
+
"learning_rate": 0.0005822935005389443,
|
37325 |
+
"loss": 1.5091,
|
37326 |
+
"step": 10614
|
37327 |
+
},
|
37328 |
+
{
|
37329 |
+
"epoch": 0.47201102663287536,
|
37330 |
+
"grad_norm": 0.06485003978013992,
|
37331 |
+
"learning_rate": 0.000582149302076966,
|
37332 |
+
"loss": 1.5048,
|
37333 |
+
"step": 10616
|
37334 |
+
},
|
37335 |
+
{
|
37336 |
+
"epoch": 0.47209995109154773,
|
37337 |
+
"grad_norm": 0.06667590141296387,
|
37338 |
+
"learning_rate": 0.0005820050965925032,
|
37339 |
+
"loss": 1.5053,
|
37340 |
+
"step": 10618
|
37341 |
+
},
|
37342 |
+
{
|
37343 |
+
"epoch": 0.4721888755502201,
|
37344 |
+
"grad_norm": 0.0658281147480011,
|
37345 |
+
"learning_rate": 0.0005818608840978837,
|
37346 |
+
"loss": 1.5085,
|
37347 |
+
"step": 10620
|
37348 |
+
},
|
37349 |
+
{
|
37350 |
+
"epoch": 0.47227780000889247,
|
37351 |
+
"grad_norm": 0.06506504863500595,
|
37352 |
+
"learning_rate": 0.0005817166646054348,
|
37353 |
+
"loss": 1.5044,
|
37354 |
+
"step": 10622
|
37355 |
+
},
|
37356 |
+
{
|
37357 |
+
"epoch": 0.4723667244675648,
|
37358 |
+
"grad_norm": 0.06641970574855804,
|
37359 |
+
"learning_rate": 0.0005815724381274854,
|
37360 |
+
"loss": 1.507,
|
37361 |
+
"step": 10624
|
37362 |
+
},
|
37363 |
+
{
|
37364 |
+
"epoch": 0.47245564892623715,
|
37365 |
+
"grad_norm": 0.06248960644006729,
|
37366 |
+
"learning_rate": 0.0005814282046763643,
|
37367 |
+
"loss": 1.505,
|
37368 |
+
"step": 10626
|
37369 |
+
},
|
37370 |
+
{
|
37371 |
+
"epoch": 0.4725445733849095,
|
37372 |
+
"grad_norm": 0.06606055051088333,
|
37373 |
+
"learning_rate": 0.0005812839642644017,
|
37374 |
+
"loss": 1.5084,
|
37375 |
+
"step": 10628
|
37376 |
+
},
|
37377 |
+
{
|
37378 |
+
"epoch": 0.4726334978435819,
|
37379 |
+
"grad_norm": 0.06419503688812256,
|
37380 |
+
"learning_rate": 0.0005811397169039277,
|
37381 |
+
"loss": 1.5038,
|
37382 |
+
"step": 10630
|
37383 |
+
},
|
37384 |
+
{
|
37385 |
+
"epoch": 0.47272242230225425,
|
37386 |
+
"grad_norm": 0.06354578584432602,
|
37387 |
+
"learning_rate": 0.0005809954626072728,
|
37388 |
+
"loss": 1.5025,
|
37389 |
+
"step": 10632
|
37390 |
+
},
|
37391 |
+
{
|
37392 |
+
"epoch": 0.47281134676092657,
|
37393 |
+
"grad_norm": 0.06533468514680862,
|
37394 |
+
"learning_rate": 0.000580851201386769,
|
37395 |
+
"loss": 1.5118,
|
37396 |
+
"step": 10634
|
37397 |
+
},
|
37398 |
+
{
|
37399 |
+
"epoch": 0.47290027121959893,
|
37400 |
+
"grad_norm": 0.06488578021526337,
|
37401 |
+
"learning_rate": 0.0005807069332547482,
|
37402 |
+
"loss": 1.5051,
|
37403 |
+
"step": 10636
|
37404 |
+
},
|
37405 |
+
{
|
37406 |
+
"epoch": 0.4729891956782713,
|
37407 |
+
"grad_norm": 0.061041004955768585,
|
37408 |
+
"learning_rate": 0.000580562658223543,
|
37409 |
+
"loss": 1.5046,
|
37410 |
+
"step": 10638
|
37411 |
+
},
|
37412 |
+
{
|
37413 |
+
"epoch": 0.47307812013694367,
|
37414 |
+
"grad_norm": 0.06147739291191101,
|
37415 |
+
"learning_rate": 0.0005804183763054869,
|
37416 |
+
"loss": 1.5003,
|
37417 |
+
"step": 10640
|
37418 |
+
},
|
37419 |
+
{
|
37420 |
+
"epoch": 0.47316704459561604,
|
37421 |
+
"grad_norm": 0.06590231508016586,
|
37422 |
+
"learning_rate": 0.0005802740875129135,
|
37423 |
+
"loss": 1.5015,
|
37424 |
+
"step": 10642
|
37425 |
+
},
|
37426 |
+
{
|
37427 |
+
"epoch": 0.4732559690542884,
|
37428 |
+
"grad_norm": 0.06333669275045395,
|
37429 |
+
"learning_rate": 0.0005801297918581574,
|
37430 |
+
"loss": 1.5054,
|
37431 |
+
"step": 10644
|
37432 |
+
},
|
37433 |
+
{
|
37434 |
+
"epoch": 0.4733448935129607,
|
37435 |
+
"grad_norm": 0.06394929438829422,
|
37436 |
+
"learning_rate": 0.0005799854893535535,
|
37437 |
+
"loss": 1.4967,
|
37438 |
+
"step": 10646
|
37439 |
+
},
|
37440 |
+
{
|
37441 |
+
"epoch": 0.4734338179716331,
|
37442 |
+
"grad_norm": 0.06518962234258652,
|
37443 |
+
"learning_rate": 0.0005798411800114375,
|
37444 |
+
"loss": 1.5029,
|
37445 |
+
"step": 10648
|
37446 |
+
},
|
37447 |
+
{
|
37448 |
+
"epoch": 0.47352274243030545,
|
37449 |
+
"grad_norm": 0.06461624801158905,
|
37450 |
+
"learning_rate": 0.0005796968638441455,
|
37451 |
+
"loss": 1.5038,
|
37452 |
+
"step": 10650
|
37453 |
+
},
|
37454 |
+
{
|
37455 |
+
"epoch": 0.4736116668889778,
|
37456 |
+
"grad_norm": 0.06398481875658035,
|
37457 |
+
"learning_rate": 0.0005795525408640146,
|
37458 |
+
"loss": 1.501,
|
37459 |
+
"step": 10652
|
37460 |
+
},
|
37461 |
+
{
|
37462 |
+
"epoch": 0.4737005913476502,
|
37463 |
+
"grad_norm": 0.06461846828460693,
|
37464 |
+
"learning_rate": 0.0005794082110833817,
|
37465 |
+
"loss": 1.5096,
|
37466 |
+
"step": 10654
|
37467 |
+
},
|
37468 |
+
{
|
37469 |
+
"epoch": 0.4737895158063225,
|
37470 |
+
"grad_norm": 0.0636335164308548,
|
37471 |
+
"learning_rate": 0.0005792638745145851,
|
37472 |
+
"loss": 1.504,
|
37473 |
+
"step": 10656
|
37474 |
+
},
|
37475 |
+
{
|
37476 |
+
"epoch": 0.47387844026499487,
|
37477 |
+
"grad_norm": 0.0646030604839325,
|
37478 |
+
"learning_rate": 0.0005791195311699631,
|
37479 |
+
"loss": 1.5081,
|
37480 |
+
"step": 10658
|
37481 |
+
},
|
37482 |
+
{
|
37483 |
+
"epoch": 0.47396736472366724,
|
37484 |
+
"grad_norm": 0.06314581632614136,
|
37485 |
+
"learning_rate": 0.0005789751810618551,
|
37486 |
+
"loss": 1.5029,
|
37487 |
+
"step": 10660
|
37488 |
+
},
|
37489 |
+
{
|
37490 |
+
"epoch": 0.4740562891823396,
|
37491 |
+
"grad_norm": 0.06447373330593109,
|
37492 |
+
"learning_rate": 0.0005788308242026004,
|
37493 |
+
"loss": 1.5029,
|
37494 |
+
"step": 10662
|
37495 |
+
},
|
37496 |
+
{
|
37497 |
+
"epoch": 0.474145213641012,
|
37498 |
+
"grad_norm": 0.06374399363994598,
|
37499 |
+
"learning_rate": 0.0005786864606045396,
|
37500 |
+
"loss": 1.5069,
|
37501 |
+
"step": 10664
|
37502 |
+
},
|
37503 |
+
{
|
37504 |
+
"epoch": 0.47423413809968434,
|
37505 |
+
"grad_norm": 0.06367414444684982,
|
37506 |
+
"learning_rate": 0.0005785420902800131,
|
37507 |
+
"loss": 1.4976,
|
37508 |
+
"step": 10666
|
37509 |
+
},
|
37510 |
+
{
|
37511 |
+
"epoch": 0.47432306255835666,
|
37512 |
+
"grad_norm": 0.06507609784603119,
|
37513 |
+
"learning_rate": 0.0005783977132413629,
|
37514 |
+
"loss": 1.5004,
|
37515 |
+
"step": 10668
|
37516 |
+
},
|
37517 |
+
{
|
37518 |
+
"epoch": 0.474411987017029,
|
37519 |
+
"grad_norm": 0.06401117891073227,
|
37520 |
+
"learning_rate": 0.0005782533295009307,
|
37521 |
+
"loss": 1.5017,
|
37522 |
+
"step": 10670
|
37523 |
+
},
|
37524 |
+
{
|
37525 |
+
"epoch": 0.4745009114757014,
|
37526 |
+
"grad_norm": 0.06449148803949356,
|
37527 |
+
"learning_rate": 0.0005781089390710588,
|
37528 |
+
"loss": 1.5078,
|
37529 |
+
"step": 10672
|
37530 |
+
},
|
37531 |
+
{
|
37532 |
+
"epoch": 0.47458983593437376,
|
37533 |
+
"grad_norm": 0.06876698136329651,
|
37534 |
+
"learning_rate": 0.0005779645419640907,
|
37535 |
+
"loss": 1.5033,
|
37536 |
+
"step": 10674
|
37537 |
+
},
|
37538 |
+
{
|
37539 |
+
"epoch": 0.47467876039304613,
|
37540 |
+
"grad_norm": 0.06464815884828568,
|
37541 |
+
"learning_rate": 0.00057782013819237,
|
37542 |
+
"loss": 1.5032,
|
37543 |
+
"step": 10676
|
37544 |
+
},
|
37545 |
+
{
|
37546 |
+
"epoch": 0.47476768485171844,
|
37547 |
+
"grad_norm": 0.06513024866580963,
|
37548 |
+
"learning_rate": 0.000577675727768241,
|
37549 |
+
"loss": 1.5039,
|
37550 |
+
"step": 10678
|
37551 |
+
},
|
37552 |
+
{
|
37553 |
+
"epoch": 0.4748566093103908,
|
37554 |
+
"grad_norm": 0.06479975581169128,
|
37555 |
+
"learning_rate": 0.0005775313107040483,
|
37556 |
+
"loss": 1.5033,
|
37557 |
+
"step": 10680
|
37558 |
+
},
|
37559 |
+
{
|
37560 |
+
"epoch": 0.4749455337690632,
|
37561 |
+
"grad_norm": 0.06562959402799606,
|
37562 |
+
"learning_rate": 0.0005773868870121377,
|
37563 |
+
"loss": 1.5035,
|
37564 |
+
"step": 10682
|
37565 |
+
},
|
37566 |
+
{
|
37567 |
+
"epoch": 0.47503445822773555,
|
37568 |
+
"grad_norm": 0.06198382005095482,
|
37569 |
+
"learning_rate": 0.0005772424567048549,
|
37570 |
+
"loss": 1.502,
|
37571 |
+
"step": 10684
|
37572 |
+
},
|
37573 |
+
{
|
37574 |
+
"epoch": 0.4751233826864079,
|
37575 |
+
"grad_norm": 0.0658661350607872,
|
37576 |
+
"learning_rate": 0.0005770980197945464,
|
37577 |
+
"loss": 1.5029,
|
37578 |
+
"step": 10686
|
37579 |
+
},
|
37580 |
+
{
|
37581 |
+
"epoch": 0.4752123071450803,
|
37582 |
+
"grad_norm": 0.06593360006809235,
|
37583 |
+
"learning_rate": 0.0005769535762935595,
|
37584 |
+
"loss": 1.5024,
|
37585 |
+
"step": 10688
|
37586 |
+
},
|
37587 |
+
{
|
37588 |
+
"epoch": 0.4753012316037526,
|
37589 |
+
"grad_norm": 0.06711971014738083,
|
37590 |
+
"learning_rate": 0.0005768091262142416,
|
37591 |
+
"loss": 1.5028,
|
37592 |
+
"step": 10690
|
37593 |
+
},
|
37594 |
+
{
|
37595 |
+
"epoch": 0.47539015606242496,
|
37596 |
+
"grad_norm": 0.06662214547395706,
|
37597 |
+
"learning_rate": 0.0005766646695689415,
|
37598 |
+
"loss": 1.4979,
|
37599 |
+
"step": 10692
|
37600 |
+
},
|
37601 |
+
{
|
37602 |
+
"epoch": 0.47547908052109733,
|
37603 |
+
"grad_norm": 0.062251124531030655,
|
37604 |
+
"learning_rate": 0.0005765202063700072,
|
37605 |
+
"loss": 1.5074,
|
37606 |
+
"step": 10694
|
37607 |
+
},
|
37608 |
+
{
|
37609 |
+
"epoch": 0.4755680049797697,
|
37610 |
+
"grad_norm": 0.0647764578461647,
|
37611 |
+
"learning_rate": 0.0005763757366297886,
|
37612 |
+
"loss": 1.5038,
|
37613 |
+
"step": 10696
|
37614 |
+
},
|
37615 |
+
{
|
37616 |
+
"epoch": 0.47565692943844207,
|
37617 |
+
"grad_norm": 0.06302308291196823,
|
37618 |
+
"learning_rate": 0.0005762312603606355,
|
37619 |
+
"loss": 1.4976,
|
37620 |
+
"step": 10698
|
37621 |
+
},
|
37622 |
+
{
|
37623 |
+
"epoch": 0.4757458538971144,
|
37624 |
+
"grad_norm": 0.06450463831424713,
|
37625 |
+
"learning_rate": 0.0005760867775748983,
|
37626 |
+
"loss": 1.502,
|
37627 |
+
"step": 10700
|
37628 |
+
},
|
37629 |
+
{
|
37630 |
+
"epoch": 0.47583477835578675,
|
37631 |
+
"grad_norm": 0.06613396853208542,
|
37632 |
+
"learning_rate": 0.000575942288284928,
|
37633 |
+
"loss": 1.4991,
|
37634 |
+
"step": 10702
|
37635 |
+
},
|
37636 |
+
{
|
37637 |
+
"epoch": 0.4759237028144591,
|
37638 |
+
"grad_norm": 0.06332564353942871,
|
37639 |
+
"learning_rate": 0.0005757977925030763,
|
37640 |
+
"loss": 1.5066,
|
37641 |
+
"step": 10704
|
37642 |
+
},
|
37643 |
+
{
|
37644 |
+
"epoch": 0.4760126272731315,
|
37645 |
+
"grad_norm": 0.06272386014461517,
|
37646 |
+
"learning_rate": 0.0005756532902416952,
|
37647 |
+
"loss": 1.5016,
|
37648 |
+
"step": 10706
|
37649 |
+
},
|
37650 |
+
{
|
37651 |
+
"epoch": 0.47610155173180385,
|
37652 |
+
"grad_norm": 0.06387588381767273,
|
37653 |
+
"learning_rate": 0.0005755087815131375,
|
37654 |
+
"loss": 1.501,
|
37655 |
+
"step": 10708
|
37656 |
+
},
|
37657 |
+
{
|
37658 |
+
"epoch": 0.47619047619047616,
|
37659 |
+
"grad_norm": 0.0668317899107933,
|
37660 |
+
"learning_rate": 0.0005753642663297564,
|
37661 |
+
"loss": 1.5068,
|
37662 |
+
"step": 10710
|
37663 |
+
},
|
37664 |
+
{
|
37665 |
+
"epoch": 0.47627940064914853,
|
37666 |
+
"grad_norm": 0.06176994368433952,
|
37667 |
+
"learning_rate": 0.0005752197447039056,
|
37668 |
+
"loss": 1.5082,
|
37669 |
+
"step": 10712
|
37670 |
+
},
|
37671 |
+
{
|
37672 |
+
"epoch": 0.4763683251078209,
|
37673 |
+
"grad_norm": 0.06459102034568787,
|
37674 |
+
"learning_rate": 0.0005750752166479397,
|
37675 |
+
"loss": 1.5063,
|
37676 |
+
"step": 10714
|
37677 |
+
},
|
37678 |
+
{
|
37679 |
+
"epoch": 0.47645724956649327,
|
37680 |
+
"grad_norm": 0.0622757263481617,
|
37681 |
+
"learning_rate": 0.0005749306821742132,
|
37682 |
+
"loss": 1.5021,
|
37683 |
+
"step": 10716
|
37684 |
+
},
|
37685 |
+
{
|
37686 |
+
"epoch": 0.47654617402516564,
|
37687 |
+
"grad_norm": 0.06583724915981293,
|
37688 |
+
"learning_rate": 0.0005747861412950821,
|
37689 |
+
"loss": 1.5038,
|
37690 |
+
"step": 10718
|
37691 |
+
},
|
37692 |
+
{
|
37693 |
+
"epoch": 0.476635098483838,
|
37694 |
+
"grad_norm": 0.0620274692773819,
|
37695 |
+
"learning_rate": 0.0005746415940229018,
|
37696 |
+
"loss": 1.5039,
|
37697 |
+
"step": 10720
|
37698 |
+
},
|
37699 |
+
{
|
37700 |
+
"epoch": 0.4767240229425103,
|
37701 |
+
"grad_norm": 0.06492220610380173,
|
37702 |
+
"learning_rate": 0.0005744970403700292,
|
37703 |
+
"loss": 1.5058,
|
37704 |
+
"step": 10722
|
37705 |
+
},
|
37706 |
+
{
|
37707 |
+
"epoch": 0.4768129474011827,
|
37708 |
+
"grad_norm": 0.06156022846698761,
|
37709 |
+
"learning_rate": 0.0005743524803488214,
|
37710 |
+
"loss": 1.5004,
|
37711 |
+
"step": 10724
|
37712 |
+
},
|
37713 |
+
{
|
37714 |
+
"epoch": 0.47690187185985505,
|
37715 |
+
"grad_norm": 0.06396898627281189,
|
37716 |
+
"learning_rate": 0.0005742079139716358,
|
37717 |
+
"loss": 1.502,
|
37718 |
+
"step": 10726
|
37719 |
+
},
|
37720 |
+
{
|
37721 |
+
"epoch": 0.4769907963185274,
|
37722 |
+
"grad_norm": 0.06404668837785721,
|
37723 |
+
"learning_rate": 0.0005740633412508307,
|
37724 |
+
"loss": 1.5032,
|
37725 |
+
"step": 10728
|
37726 |
+
},
|
37727 |
+
{
|
37728 |
+
"epoch": 0.4770797207771998,
|
37729 |
+
"grad_norm": 0.06537303328514099,
|
37730 |
+
"learning_rate": 0.0005739187621987648,
|
37731 |
+
"loss": 1.5036,
|
37732 |
+
"step": 10730
|
37733 |
+
},
|
37734 |
+
{
|
37735 |
+
"epoch": 0.4771686452358721,
|
37736 |
+
"grad_norm": 0.0657230019569397,
|
37737 |
+
"learning_rate": 0.0005737741768277974,
|
37738 |
+
"loss": 1.505,
|
37739 |
+
"step": 10732
|
37740 |
+
},
|
37741 |
+
{
|
37742 |
+
"epoch": 0.47725756969454447,
|
37743 |
+
"grad_norm": 0.06339870393276215,
|
37744 |
+
"learning_rate": 0.0005736295851502882,
|
37745 |
+
"loss": 1.5034,
|
37746 |
+
"step": 10734
|
37747 |
+
},
|
37748 |
+
{
|
37749 |
+
"epoch": 0.47734649415321684,
|
37750 |
+
"grad_norm": 0.06523014605045319,
|
37751 |
+
"learning_rate": 0.0005734849871785976,
|
37752 |
+
"loss": 1.5058,
|
37753 |
+
"step": 10736
|
37754 |
+
},
|
37755 |
+
{
|
37756 |
+
"epoch": 0.4774354186118892,
|
37757 |
+
"grad_norm": 0.06503810733556747,
|
37758 |
+
"learning_rate": 0.0005733403829250865,
|
37759 |
+
"loss": 1.5031,
|
37760 |
+
"step": 10738
|
37761 |
+
},
|
37762 |
+
{
|
37763 |
+
"epoch": 0.4775243430705616,
|
37764 |
+
"grad_norm": 0.0636671707034111,
|
37765 |
+
"learning_rate": 0.0005731957724021163,
|
37766 |
+
"loss": 1.5059,
|
37767 |
+
"step": 10740
|
37768 |
+
},
|
37769 |
+
{
|
37770 |
+
"epoch": 0.47761326752923394,
|
37771 |
+
"grad_norm": 0.06321322917938232,
|
37772 |
+
"learning_rate": 0.0005730511556220488,
|
37773 |
+
"loss": 1.503,
|
37774 |
+
"step": 10742
|
37775 |
+
},
|
37776 |
+
{
|
37777 |
+
"epoch": 0.47770219198790626,
|
37778 |
+
"grad_norm": 0.06607749313116074,
|
37779 |
+
"learning_rate": 0.0005729065325972467,
|
37780 |
+
"loss": 1.5087,
|
37781 |
+
"step": 10744
|
37782 |
+
},
|
37783 |
+
{
|
37784 |
+
"epoch": 0.4777911164465786,
|
37785 |
+
"grad_norm": 0.06520562618970871,
|
37786 |
+
"learning_rate": 0.0005727619033400729,
|
37787 |
+
"loss": 1.5022,
|
37788 |
+
"step": 10746
|
37789 |
+
},
|
37790 |
+
{
|
37791 |
+
"epoch": 0.477880040905251,
|
37792 |
+
"grad_norm": 0.06609973311424255,
|
37793 |
+
"learning_rate": 0.0005726172678628907,
|
37794 |
+
"loss": 1.5012,
|
37795 |
+
"step": 10748
|
37796 |
+
},
|
37797 |
+
{
|
37798 |
+
"epoch": 0.47796896536392336,
|
37799 |
+
"grad_norm": 0.06533106416463852,
|
37800 |
+
"learning_rate": 0.0005724726261780648,
|
37801 |
+
"loss": 1.4998,
|
37802 |
+
"step": 10750
|
37803 |
+
},
|
37804 |
+
{
|
37805 |
+
"epoch": 0.4780578898225957,
|
37806 |
+
"grad_norm": 0.06694469600915909,
|
37807 |
+
"learning_rate": 0.0005723279782979591,
|
37808 |
+
"loss": 1.5035,
|
37809 |
+
"step": 10752
|
37810 |
+
},
|
37811 |
+
{
|
37812 |
+
"epoch": 0.47814681428126804,
|
37813 |
+
"grad_norm": 0.06440529227256775,
|
37814 |
+
"learning_rate": 0.0005721833242349393,
|
37815 |
+
"loss": 1.5098,
|
37816 |
+
"step": 10754
|
37817 |
+
},
|
37818 |
+
{
|
37819 |
+
"epoch": 0.4782357387399404,
|
37820 |
+
"grad_norm": 0.06357917189598083,
|
37821 |
+
"learning_rate": 0.0005720386640013707,
|
37822 |
+
"loss": 1.5016,
|
37823 |
+
"step": 10756
|
37824 |
+
},
|
37825 |
+
{
|
37826 |
+
"epoch": 0.4783246631986128,
|
37827 |
+
"grad_norm": 0.06445612013339996,
|
37828 |
+
"learning_rate": 0.0005718939976096198,
|
37829 |
+
"loss": 1.5005,
|
37830 |
+
"step": 10758
|
37831 |
+
},
|
37832 |
+
{
|
37833 |
+
"epoch": 0.47841358765728514,
|
37834 |
+
"grad_norm": 0.062472015619277954,
|
37835 |
+
"learning_rate": 0.000571749325072053,
|
37836 |
+
"loss": 1.5029,
|
37837 |
+
"step": 10760
|
37838 |
+
},
|
37839 |
+
{
|
37840 |
+
"epoch": 0.4785025121159575,
|
37841 |
+
"grad_norm": 0.06607341021299362,
|
37842 |
+
"learning_rate": 0.0005716046464010378,
|
37843 |
+
"loss": 1.5072,
|
37844 |
+
"step": 10762
|
37845 |
+
},
|
37846 |
+
{
|
37847 |
+
"epoch": 0.4785914365746298,
|
37848 |
+
"grad_norm": 0.06257757544517517,
|
37849 |
+
"learning_rate": 0.0005714599616089419,
|
37850 |
+
"loss": 1.5015,
|
37851 |
+
"step": 10764
|
37852 |
+
},
|
37853 |
+
{
|
37854 |
+
"epoch": 0.4786803610333022,
|
37855 |
+
"grad_norm": 0.06556899100542068,
|
37856 |
+
"learning_rate": 0.0005713152707081335,
|
37857 |
+
"loss": 1.5052,
|
37858 |
+
"step": 10766
|
37859 |
+
},
|
37860 |
+
{
|
37861 |
+
"epoch": 0.47876928549197456,
|
37862 |
+
"grad_norm": 0.06689021736383438,
|
37863 |
+
"learning_rate": 0.0005711705737109816,
|
37864 |
+
"loss": 1.5016,
|
37865 |
+
"step": 10768
|
37866 |
+
},
|
37867 |
+
{
|
37868 |
+
"epoch": 0.47885820995064693,
|
37869 |
+
"grad_norm": 0.06304630637168884,
|
37870 |
+
"learning_rate": 0.0005710258706298553,
|
37871 |
+
"loss": 1.5011,
|
37872 |
+
"step": 10770
|
37873 |
+
},
|
37874 |
+
{
|
37875 |
+
"epoch": 0.4789471344093193,
|
37876 |
+
"grad_norm": 0.06447041779756546,
|
37877 |
+
"learning_rate": 0.0005708811614771245,
|
37878 |
+
"loss": 1.5073,
|
37879 |
+
"step": 10772
|
37880 |
+
},
|
37881 |
+
{
|
37882 |
+
"epoch": 0.47903605886799167,
|
37883 |
+
"grad_norm": 0.06245449185371399,
|
37884 |
+
"learning_rate": 0.0005707364462651598,
|
37885 |
+
"loss": 1.4974,
|
37886 |
+
"step": 10774
|
37887 |
+
},
|
37888 |
+
{
|
37889 |
+
"epoch": 0.479124983326664,
|
37890 |
+
"grad_norm": 0.063130222260952,
|
37891 |
+
"learning_rate": 0.000570591725006332,
|
37892 |
+
"loss": 1.4997,
|
37893 |
+
"step": 10776
|
37894 |
+
},
|
37895 |
+
{
|
37896 |
+
"epoch": 0.47921390778533635,
|
37897 |
+
"grad_norm": 0.06639711558818817,
|
37898 |
+
"learning_rate": 0.0005704469977130123,
|
37899 |
+
"loss": 1.5042,
|
37900 |
+
"step": 10778
|
37901 |
+
},
|
37902 |
+
{
|
37903 |
+
"epoch": 0.4793028322440087,
|
37904 |
+
"grad_norm": 0.06467228382825851,
|
37905 |
+
"learning_rate": 0.0005703022643975728,
|
37906 |
+
"loss": 1.5027,
|
37907 |
+
"step": 10780
|
37908 |
+
},
|
37909 |
+
{
|
37910 |
+
"epoch": 0.4793917567026811,
|
37911 |
+
"grad_norm": 0.06834383308887482,
|
37912 |
+
"learning_rate": 0.0005701575250723859,
|
37913 |
+
"loss": 1.4984,
|
37914 |
+
"step": 10782
|
37915 |
+
},
|
37916 |
+
{
|
37917 |
+
"epoch": 0.47948068116135345,
|
37918 |
+
"grad_norm": 0.0679764524102211,
|
37919 |
+
"learning_rate": 0.0005700127797498248,
|
37920 |
+
"loss": 1.5074,
|
37921 |
+
"step": 10784
|
37922 |
+
},
|
37923 |
+
{
|
37924 |
+
"epoch": 0.47956960562002576,
|
37925 |
+
"grad_norm": 0.06350118666887283,
|
37926 |
+
"learning_rate": 0.0005698680284422626,
|
37927 |
+
"loss": 1.4981,
|
37928 |
+
"step": 10786
|
37929 |
+
},
|
37930 |
+
{
|
37931 |
+
"epoch": 0.47965853007869813,
|
37932 |
+
"grad_norm": 0.06375767290592194,
|
37933 |
+
"learning_rate": 0.0005697232711620733,
|
37934 |
+
"loss": 1.5007,
|
37935 |
+
"step": 10788
|
37936 |
+
},
|
37937 |
+
{
|
37938 |
+
"epoch": 0.4797474545373705,
|
37939 |
+
"grad_norm": 0.0636902004480362,
|
37940 |
+
"learning_rate": 0.0005695785079216318,
|
37941 |
+
"loss": 1.5074,
|
37942 |
+
"step": 10790
|
37943 |
+
},
|
37944 |
+
{
|
37945 |
+
"epoch": 0.47983637899604287,
|
37946 |
+
"grad_norm": 0.06637261062860489,
|
37947 |
+
"learning_rate": 0.0005694337387333127,
|
37948 |
+
"loss": 1.5031,
|
37949 |
+
"step": 10792
|
37950 |
+
},
|
37951 |
+
{
|
37952 |
+
"epoch": 0.47992530345471524,
|
37953 |
+
"grad_norm": 0.06588244438171387,
|
37954 |
+
"learning_rate": 0.0005692889636094917,
|
37955 |
+
"loss": 1.5034,
|
37956 |
+
"step": 10794
|
37957 |
+
},
|
37958 |
+
{
|
37959 |
+
"epoch": 0.4800142279133876,
|
37960 |
+
"grad_norm": 0.06770049780607224,
|
37961 |
+
"learning_rate": 0.0005691441825625446,
|
37962 |
+
"loss": 1.5026,
|
37963 |
+
"step": 10796
|
37964 |
+
},
|
37965 |
+
{
|
37966 |
+
"epoch": 0.4801031523720599,
|
37967 |
+
"grad_norm": 0.06415392458438873,
|
37968 |
+
"learning_rate": 0.0005689993956048481,
|
37969 |
+
"loss": 1.5048,
|
37970 |
+
"step": 10798
|
37971 |
+
},
|
37972 |
+
{
|
37973 |
+
"epoch": 0.4801920768307323,
|
37974 |
+
"grad_norm": 0.06577417999505997,
|
37975 |
+
"learning_rate": 0.0005688546027487792,
|
37976 |
+
"loss": 1.5023,
|
37977 |
+
"step": 10800
|
37978 |
+
},
|
37979 |
+
{
|
37980 |
+
"epoch": 0.48028100128940465,
|
37981 |
+
"grad_norm": 0.06189163774251938,
|
37982 |
+
"learning_rate": 0.0005687098040067153,
|
37983 |
+
"loss": 1.5017,
|
37984 |
+
"step": 10802
|
37985 |
+
},
|
37986 |
+
{
|
37987 |
+
"epoch": 0.480369925748077,
|
37988 |
+
"grad_norm": 0.06363365054130554,
|
37989 |
+
"learning_rate": 0.0005685649993910348,
|
37990 |
+
"loss": 1.5037,
|
37991 |
+
"step": 10804
|
37992 |
+
},
|
37993 |
+
{
|
37994 |
+
"epoch": 0.4804588502067494,
|
37995 |
+
"grad_norm": 0.06573474407196045,
|
37996 |
+
"learning_rate": 0.0005684201889141158,
|
37997 |
+
"loss": 1.5027,
|
37998 |
+
"step": 10806
|
37999 |
+
},
|
38000 |
+
{
|
38001 |
+
"epoch": 0.4805477746654217,
|
38002 |
+
"grad_norm": 0.06577087193727493,
|
38003 |
+
"learning_rate": 0.0005682753725883378,
|
38004 |
+
"loss": 1.5003,
|
38005 |
+
"step": 10808
|
38006 |
+
},
|
38007 |
+
{
|
38008 |
+
"epoch": 0.48063669912409407,
|
38009 |
+
"grad_norm": 0.062234729528427124,
|
38010 |
+
"learning_rate": 0.0005681305504260798,
|
38011 |
+
"loss": 1.5009,
|
38012 |
+
"step": 10810
|
38013 |
+
},
|
38014 |
+
{
|
38015 |
+
"epoch": 0.48072562358276644,
|
38016 |
+
"grad_norm": 0.06282081454992294,
|
38017 |
+
"learning_rate": 0.0005679857224397222,
|
38018 |
+
"loss": 1.5035,
|
38019 |
+
"step": 10812
|
38020 |
+
},
|
38021 |
+
{
|
38022 |
+
"epoch": 0.4808145480414388,
|
38023 |
+
"grad_norm": 0.06254705041646957,
|
38024 |
+
"learning_rate": 0.0005678408886416454,
|
38025 |
+
"loss": 1.5049,
|
38026 |
+
"step": 10814
|
38027 |
+
},
|
38028 |
+
{
|
38029 |
+
"epoch": 0.4809034725001112,
|
38030 |
+
"grad_norm": 0.06438016891479492,
|
38031 |
+
"learning_rate": 0.0005676960490442305,
|
38032 |
+
"loss": 1.4978,
|
38033 |
+
"step": 10816
|
38034 |
+
},
|
38035 |
+
{
|
38036 |
+
"epoch": 0.4809923969587835,
|
38037 |
+
"grad_norm": 0.06419461965560913,
|
38038 |
+
"learning_rate": 0.0005675512036598592,
|
38039 |
+
"loss": 1.5005,
|
38040 |
+
"step": 10818
|
38041 |
+
},
|
38042 |
+
{
|
38043 |
+
"epoch": 0.48108132141745585,
|
38044 |
+
"grad_norm": 0.06625373661518097,
|
38045 |
+
"learning_rate": 0.000567406352500913,
|
38046 |
+
"loss": 1.4987,
|
38047 |
+
"step": 10820
|
38048 |
+
},
|
38049 |
+
{
|
38050 |
+
"epoch": 0.4811702458761282,
|
38051 |
+
"grad_norm": 0.06367786973714828,
|
38052 |
+
"learning_rate": 0.0005672614955797749,
|
38053 |
+
"loss": 1.5023,
|
38054 |
+
"step": 10822
|
38055 |
+
},
|
38056 |
+
{
|
38057 |
+
"epoch": 0.4812591703348006,
|
38058 |
+
"grad_norm": 0.06512659043073654,
|
38059 |
+
"learning_rate": 0.0005671166329088278,
|
38060 |
+
"loss": 1.5038,
|
38061 |
+
"step": 10824
|
38062 |
+
},
|
38063 |
+
{
|
38064 |
+
"epoch": 0.48134809479347296,
|
38065 |
+
"grad_norm": 0.06565241515636444,
|
38066 |
+
"learning_rate": 0.0005669717645004551,
|
38067 |
+
"loss": 1.5081,
|
38068 |
+
"step": 10826
|
38069 |
+
},
|
38070 |
+
{
|
38071 |
+
"epoch": 0.4814370192521453,
|
38072 |
+
"grad_norm": 0.06338914483785629,
|
38073 |
+
"learning_rate": 0.0005668268903670407,
|
38074 |
+
"loss": 1.5058,
|
38075 |
+
"step": 10828
|
38076 |
+
},
|
38077 |
+
{
|
38078 |
+
"epoch": 0.48152594371081764,
|
38079 |
+
"grad_norm": 0.06349223107099533,
|
38080 |
+
"learning_rate": 0.0005666820105209694,
|
38081 |
+
"loss": 1.4945,
|
38082 |
+
"step": 10830
|
38083 |
+
},
|
38084 |
+
{
|
38085 |
+
"epoch": 0.48161486816949,
|
38086 |
+
"grad_norm": 0.06533645838499069,
|
38087 |
+
"learning_rate": 0.0005665371249746259,
|
38088 |
+
"loss": 1.503,
|
38089 |
+
"step": 10832
|
38090 |
+
},
|
38091 |
+
{
|
38092 |
+
"epoch": 0.4817037926281624,
|
38093 |
+
"grad_norm": 0.07015854865312576,
|
38094 |
+
"learning_rate": 0.0005663922337403957,
|
38095 |
+
"loss": 1.5032,
|
38096 |
+
"step": 10834
|
38097 |
+
},
|
38098 |
+
{
|
38099 |
+
"epoch": 0.48179271708683474,
|
38100 |
+
"grad_norm": 0.06523853540420532,
|
38101 |
+
"learning_rate": 0.0005662473368306649,
|
38102 |
+
"loss": 1.5043,
|
38103 |
+
"step": 10836
|
38104 |
+
},
|
38105 |
+
{
|
38106 |
+
"epoch": 0.4818816415455071,
|
38107 |
+
"grad_norm": 0.06229966878890991,
|
38108 |
+
"learning_rate": 0.0005661024342578197,
|
38109 |
+
"loss": 1.5009,
|
38110 |
+
"step": 10838
|
38111 |
+
},
|
38112 |
+
{
|
38113 |
+
"epoch": 0.4819705660041794,
|
38114 |
+
"grad_norm": 0.06518285721540451,
|
38115 |
+
"learning_rate": 0.0005659575260342473,
|
38116 |
+
"loss": 1.5044,
|
38117 |
+
"step": 10840
|
38118 |
+
},
|
38119 |
+
{
|
38120 |
+
"epoch": 0.4820594904628518,
|
38121 |
+
"grad_norm": 0.06520578265190125,
|
38122 |
+
"learning_rate": 0.0005658126121723346,
|
38123 |
+
"loss": 1.5082,
|
38124 |
+
"step": 10842
|
38125 |
+
},
|
38126 |
+
{
|
38127 |
+
"epoch": 0.48214841492152416,
|
38128 |
+
"grad_norm": 0.06587293744087219,
|
38129 |
+
"learning_rate": 0.00056566769268447,
|
38130 |
+
"loss": 1.5047,
|
38131 |
+
"step": 10844
|
38132 |
+
},
|
38133 |
+
{
|
38134 |
+
"epoch": 0.48223733938019653,
|
38135 |
+
"grad_norm": 0.06606019288301468,
|
38136 |
+
"learning_rate": 0.0005655227675830416,
|
38137 |
+
"loss": 1.4998,
|
38138 |
+
"step": 10846
|
38139 |
+
},
|
38140 |
+
{
|
38141 |
+
"epoch": 0.4823262638388689,
|
38142 |
+
"grad_norm": 0.06386158615350723,
|
38143 |
+
"learning_rate": 0.0005653778368804381,
|
38144 |
+
"loss": 1.5081,
|
38145 |
+
"step": 10848
|
38146 |
+
},
|
38147 |
+
{
|
38148 |
+
"epoch": 0.48241518829754126,
|
38149 |
+
"grad_norm": 0.06474409252405167,
|
38150 |
+
"learning_rate": 0.0005652329005890492,
|
38151 |
+
"loss": 1.5008,
|
38152 |
+
"step": 10850
|
38153 |
+
},
|
38154 |
+
{
|
38155 |
+
"epoch": 0.4825041127562136,
|
38156 |
+
"grad_norm": 0.06231878697872162,
|
38157 |
+
"learning_rate": 0.0005650879587212645,
|
38158 |
+
"loss": 1.4979,
|
38159 |
+
"step": 10852
|
38160 |
+
},
|
38161 |
+
{
|
38162 |
+
"epoch": 0.48259303721488594,
|
38163 |
+
"grad_norm": 0.06352438777685165,
|
38164 |
+
"learning_rate": 0.0005649430112894743,
|
38165 |
+
"loss": 1.5012,
|
38166 |
+
"step": 10854
|
38167 |
+
},
|
38168 |
+
{
|
38169 |
+
"epoch": 0.4826819616735583,
|
38170 |
+
"grad_norm": 0.06429651379585266,
|
38171 |
+
"learning_rate": 0.000564798058306069,
|
38172 |
+
"loss": 1.4951,
|
38173 |
+
"step": 10856
|
38174 |
+
},
|
38175 |
+
{
|
38176 |
+
"epoch": 0.4827708861322307,
|
38177 |
+
"grad_norm": 0.06389694660902023,
|
38178 |
+
"learning_rate": 0.0005646530997834403,
|
38179 |
+
"loss": 1.4992,
|
38180 |
+
"step": 10858
|
38181 |
+
},
|
38182 |
+
{
|
38183 |
+
"epoch": 0.48285981059090305,
|
38184 |
+
"grad_norm": 0.06426005810499191,
|
38185 |
+
"learning_rate": 0.0005645081357339797,
|
38186 |
+
"loss": 1.4968,
|
38187 |
+
"step": 10860
|
38188 |
+
},
|
38189 |
+
{
|
38190 |
+
"epoch": 0.48294873504957536,
|
38191 |
+
"grad_norm": 0.062285613268613815,
|
38192 |
+
"learning_rate": 0.0005643631661700796,
|
38193 |
+
"loss": 1.4996,
|
38194 |
+
"step": 10862
|
38195 |
+
},
|
38196 |
+
{
|
38197 |
+
"epoch": 0.48303765950824773,
|
38198 |
+
"grad_norm": 0.06469501554965973,
|
38199 |
+
"learning_rate": 0.0005642181911041321,
|
38200 |
+
"loss": 1.4993,
|
38201 |
+
"step": 10864
|
38202 |
+
},
|
38203 |
+
{
|
38204 |
+
"epoch": 0.4831265839669201,
|
38205 |
+
"grad_norm": 0.06305573880672455,
|
38206 |
+
"learning_rate": 0.0005640732105485308,
|
38207 |
+
"loss": 1.5,
|
38208 |
+
"step": 10866
|
38209 |
+
},
|
38210 |
+
{
|
38211 |
+
"epoch": 0.48321550842559247,
|
38212 |
+
"grad_norm": 0.06352468580007553,
|
38213 |
+
"learning_rate": 0.000563928224515669,
|
38214 |
+
"loss": 1.5065,
|
38215 |
+
"step": 10868
|
38216 |
+
},
|
38217 |
+
{
|
38218 |
+
"epoch": 0.48330443288426483,
|
38219 |
+
"grad_norm": 0.06422882527112961,
|
38220 |
+
"learning_rate": 0.0005637832330179409,
|
38221 |
+
"loss": 1.5032,
|
38222 |
+
"step": 10870
|
38223 |
+
},
|
38224 |
+
{
|
38225 |
+
"epoch": 0.4833933573429372,
|
38226 |
+
"grad_norm": 0.06555035710334778,
|
38227 |
+
"learning_rate": 0.000563638236067741,
|
38228 |
+
"loss": 1.5019,
|
38229 |
+
"step": 10872
|
38230 |
+
},
|
38231 |
+
{
|
38232 |
+
"epoch": 0.4834822818016095,
|
38233 |
+
"grad_norm": 0.0639529749751091,
|
38234 |
+
"learning_rate": 0.0005634932336774641,
|
38235 |
+
"loss": 1.4982,
|
38236 |
+
"step": 10874
|
38237 |
+
},
|
38238 |
+
{
|
38239 |
+
"epoch": 0.4835712062602819,
|
38240 |
+
"grad_norm": 0.06562841683626175,
|
38241 |
+
"learning_rate": 0.0005633482258595059,
|
38242 |
+
"loss": 1.5008,
|
38243 |
+
"step": 10876
|
38244 |
+
},
|
38245 |
+
{
|
38246 |
+
"epoch": 0.48366013071895425,
|
38247 |
+
"grad_norm": 0.06432850658893585,
|
38248 |
+
"learning_rate": 0.0005632032126262622,
|
38249 |
+
"loss": 1.507,
|
38250 |
+
"step": 10878
|
38251 |
+
},
|
38252 |
+
{
|
38253 |
+
"epoch": 0.4837490551776266,
|
38254 |
+
"grad_norm": 0.06400411576032639,
|
38255 |
+
"learning_rate": 0.0005630581939901294,
|
38256 |
+
"loss": 1.5074,
|
38257 |
+
"step": 10880
|
38258 |
+
},
|
38259 |
+
{
|
38260 |
+
"epoch": 0.483837979636299,
|
38261 |
+
"grad_norm": 0.06370353698730469,
|
38262 |
+
"learning_rate": 0.0005629131699635041,
|
38263 |
+
"loss": 1.4988,
|
38264 |
+
"step": 10882
|
38265 |
+
},
|
38266 |
+
{
|
38267 |
+
"epoch": 0.4839269040949713,
|
38268 |
+
"grad_norm": 0.06292074173688889,
|
38269 |
+
"learning_rate": 0.0005627681405587839,
|
38270 |
+
"loss": 1.5069,
|
38271 |
+
"step": 10884
|
38272 |
+
},
|
38273 |
+
{
|
38274 |
+
"epoch": 0.48401582855364367,
|
38275 |
+
"grad_norm": 0.0631115660071373,
|
38276 |
+
"learning_rate": 0.0005626231057883664,
|
38277 |
+
"loss": 1.503,
|
38278 |
+
"step": 10886
|
38279 |
+
},
|
38280 |
+
{
|
38281 |
+
"epoch": 0.48410475301231604,
|
38282 |
+
"grad_norm": 0.06345234811306,
|
38283 |
+
"learning_rate": 0.0005624780656646499,
|
38284 |
+
"loss": 1.5007,
|
38285 |
+
"step": 10888
|
38286 |
+
},
|
38287 |
+
{
|
38288 |
+
"epoch": 0.4841936774709884,
|
38289 |
+
"grad_norm": 0.06285906583070755,
|
38290 |
+
"learning_rate": 0.000562333020200033,
|
38291 |
+
"loss": 1.5008,
|
38292 |
+
"step": 10890
|
38293 |
+
},
|
38294 |
+
{
|
38295 |
+
"epoch": 0.48428260192966077,
|
38296 |
+
"grad_norm": 0.06470154970884323,
|
38297 |
+
"learning_rate": 0.0005621879694069148,
|
38298 |
+
"loss": 1.5029,
|
38299 |
+
"step": 10892
|
38300 |
+
},
|
38301 |
+
{
|
38302 |
+
"epoch": 0.4843715263883331,
|
38303 |
+
"grad_norm": 0.06552711874246597,
|
38304 |
+
"learning_rate": 0.000562042913297695,
|
38305 |
+
"loss": 1.5065,
|
38306 |
+
"step": 10894
|
38307 |
+
},
|
38308 |
+
{
|
38309 |
+
"epoch": 0.48446045084700545,
|
38310 |
+
"grad_norm": 0.0655001625418663,
|
38311 |
+
"learning_rate": 0.0005618978518847733,
|
38312 |
+
"loss": 1.5033,
|
38313 |
+
"step": 10896
|
38314 |
+
},
|
38315 |
+
{
|
38316 |
+
"epoch": 0.4845493753056778,
|
38317 |
+
"grad_norm": 0.06529513746500015,
|
38318 |
+
"learning_rate": 0.0005617527851805507,
|
38319 |
+
"loss": 1.4996,
|
38320 |
+
"step": 10898
|
38321 |
+
},
|
38322 |
+
{
|
38323 |
+
"epoch": 0.4846382997643502,
|
38324 |
+
"grad_norm": 0.06527028232812881,
|
38325 |
+
"learning_rate": 0.0005616077131974279,
|
38326 |
+
"loss": 1.5027,
|
38327 |
+
"step": 10900
|
38328 |
+
},
|
38329 |
+
{
|
38330 |
+
"epoch": 0.48472722422302256,
|
38331 |
+
"grad_norm": 0.06357049942016602,
|
38332 |
+
"learning_rate": 0.0005614626359478062,
|
38333 |
+
"loss": 1.5025,
|
38334 |
+
"step": 10902
|
38335 |
+
},
|
38336 |
+
{
|
38337 |
+
"epoch": 0.4848161486816949,
|
38338 |
+
"grad_norm": 0.06293467432260513,
|
38339 |
+
"learning_rate": 0.0005613175534440875,
|
38340 |
+
"loss": 1.5036,
|
38341 |
+
"step": 10904
|
38342 |
+
},
|
38343 |
+
{
|
38344 |
+
"epoch": 0.48490507314036724,
|
38345 |
+
"grad_norm": 0.06417332589626312,
|
38346 |
+
"learning_rate": 0.0005611724656986741,
|
38347 |
+
"loss": 1.4987,
|
38348 |
+
"step": 10906
|
38349 |
+
},
|
38350 |
+
{
|
38351 |
+
"epoch": 0.4849939975990396,
|
38352 |
+
"grad_norm": 0.06176379695534706,
|
38353 |
+
"learning_rate": 0.0005610273727239688,
|
38354 |
+
"loss": 1.5024,
|
38355 |
+
"step": 10908
|
38356 |
+
},
|
38357 |
+
{
|
38358 |
+
"epoch": 0.485082922057712,
|
38359 |
+
"grad_norm": 0.06424736231565475,
|
38360 |
+
"learning_rate": 0.0005608822745323748,
|
38361 |
+
"loss": 1.5012,
|
38362 |
+
"step": 10910
|
38363 |
+
},
|
38364 |
+
{
|
38365 |
+
"epoch": 0.48517184651638434,
|
38366 |
+
"grad_norm": 0.06530416756868362,
|
38367 |
+
"learning_rate": 0.0005607371711362956,
|
38368 |
+
"loss": 1.4997,
|
38369 |
+
"step": 10912
|
38370 |
+
},
|
38371 |
+
{
|
38372 |
+
"epoch": 0.4852607709750567,
|
38373 |
+
"grad_norm": 0.06718974560499191,
|
38374 |
+
"learning_rate": 0.0005605920625481353,
|
38375 |
+
"loss": 1.5028,
|
38376 |
+
"step": 10914
|
38377 |
+
},
|
38378 |
+
{
|
38379 |
+
"epoch": 0.485349695433729,
|
38380 |
+
"grad_norm": 0.06613506376743317,
|
38381 |
+
"learning_rate": 0.0005604469487802987,
|
38382 |
+
"loss": 1.4958,
|
38383 |
+
"step": 10916
|
38384 |
+
},
|
38385 |
+
{
|
38386 |
+
"epoch": 0.4854386198924014,
|
38387 |
+
"grad_norm": 0.06338746100664139,
|
38388 |
+
"learning_rate": 0.0005603018298451903,
|
38389 |
+
"loss": 1.5018,
|
38390 |
+
"step": 10918
|
38391 |
+
},
|
38392 |
+
{
|
38393 |
+
"epoch": 0.48552754435107376,
|
38394 |
+
"grad_norm": 0.06582844257354736,
|
38395 |
+
"learning_rate": 0.0005601567057552158,
|
38396 |
+
"loss": 1.4997,
|
38397 |
+
"step": 10920
|
38398 |
+
},
|
38399 |
+
{
|
38400 |
+
"epoch": 0.4856164688097461,
|
38401 |
+
"grad_norm": 0.06286223977804184,
|
38402 |
+
"learning_rate": 0.000560011576522781,
|
38403 |
+
"loss": 1.4998,
|
38404 |
+
"step": 10922
|
38405 |
+
},
|
38406 |
+
{
|
38407 |
+
"epoch": 0.4857053932684185,
|
38408 |
+
"grad_norm": 0.06453262269496918,
|
38409 |
+
"learning_rate": 0.0005598664421602921,
|
38410 |
+
"loss": 1.4966,
|
38411 |
+
"step": 10924
|
38412 |
+
},
|
38413 |
+
{
|
38414 |
+
"epoch": 0.48579431772709086,
|
38415 |
+
"grad_norm": 0.06320101022720337,
|
38416 |
+
"learning_rate": 0.0005597213026801561,
|
38417 |
+
"loss": 1.5054,
|
38418 |
+
"step": 10926
|
38419 |
+
},
|
38420 |
+
{
|
38421 |
+
"epoch": 0.4858832421857632,
|
38422 |
+
"grad_norm": 0.06558766961097717,
|
38423 |
+
"learning_rate": 0.0005595761580947798,
|
38424 |
+
"loss": 1.5011,
|
38425 |
+
"step": 10928
|
38426 |
+
},
|
38427 |
+
{
|
38428 |
+
"epoch": 0.48597216664443554,
|
38429 |
+
"grad_norm": 0.06365552544593811,
|
38430 |
+
"learning_rate": 0.000559431008416571,
|
38431 |
+
"loss": 1.5008,
|
38432 |
+
"step": 10930
|
38433 |
+
},
|
38434 |
+
{
|
38435 |
+
"epoch": 0.4860610911031079,
|
38436 |
+
"grad_norm": 0.06486231088638306,
|
38437 |
+
"learning_rate": 0.0005592858536579377,
|
38438 |
+
"loss": 1.5008,
|
38439 |
+
"step": 10932
|
38440 |
+
},
|
38441 |
+
{
|
38442 |
+
"epoch": 0.4861500155617803,
|
38443 |
+
"grad_norm": 0.063105009496212,
|
38444 |
+
"learning_rate": 0.0005591406938312885,
|
38445 |
+
"loss": 1.5011,
|
38446 |
+
"step": 10934
|
38447 |
+
},
|
38448 |
+
{
|
38449 |
+
"epoch": 0.48623894002045265,
|
38450 |
+
"grad_norm": 0.06280262023210526,
|
38451 |
+
"learning_rate": 0.000558995528949032,
|
38452 |
+
"loss": 1.4973,
|
38453 |
+
"step": 10936
|
38454 |
+
},
|
38455 |
+
{
|
38456 |
+
"epoch": 0.48632786447912496,
|
38457 |
+
"grad_norm": 0.0647190734744072,
|
38458 |
+
"learning_rate": 0.0005588503590235777,
|
38459 |
+
"loss": 1.4982,
|
38460 |
+
"step": 10938
|
38461 |
+
},
|
38462 |
+
{
|
38463 |
+
"epoch": 0.48641678893779733,
|
38464 |
+
"grad_norm": 0.06445516645908356,
|
38465 |
+
"learning_rate": 0.0005587051840673355,
|
38466 |
+
"loss": 1.5,
|
38467 |
+
"step": 10940
|
38468 |
+
},
|
38469 |
+
{
|
38470 |
+
"epoch": 0.4865057133964697,
|
38471 |
+
"grad_norm": 0.06466984003782272,
|
38472 |
+
"learning_rate": 0.0005585600040927154,
|
38473 |
+
"loss": 1.4962,
|
38474 |
+
"step": 10942
|
38475 |
+
},
|
38476 |
+
{
|
38477 |
+
"epoch": 0.48659463785514206,
|
38478 |
+
"grad_norm": 0.06264392286539078,
|
38479 |
+
"learning_rate": 0.0005584148191121279,
|
38480 |
+
"loss": 1.4962,
|
38481 |
+
"step": 10944
|
38482 |
+
},
|
38483 |
+
{
|
38484 |
+
"epoch": 0.48668356231381443,
|
38485 |
+
"grad_norm": 0.06432507932186127,
|
38486 |
+
"learning_rate": 0.0005582696291379843,
|
38487 |
+
"loss": 1.4995,
|
38488 |
+
"step": 10946
|
38489 |
+
},
|
38490 |
+
{
|
38491 |
+
"epoch": 0.48677248677248675,
|
38492 |
+
"grad_norm": 0.0634993389248848,
|
38493 |
+
"learning_rate": 0.0005581244341826963,
|
38494 |
+
"loss": 1.5032,
|
38495 |
+
"step": 10948
|
38496 |
+
},
|
38497 |
+
{
|
38498 |
+
"epoch": 0.4868614112311591,
|
38499 |
+
"grad_norm": 0.06333111226558685,
|
38500 |
+
"learning_rate": 0.0005579792342586753,
|
38501 |
+
"loss": 1.5059,
|
38502 |
+
"step": 10950
|
38503 |
+
},
|
38504 |
+
{
|
38505 |
+
"epoch": 0.4869503356898315,
|
38506 |
+
"grad_norm": 0.06281735002994537,
|
38507 |
+
"learning_rate": 0.0005578340293783339,
|
38508 |
+
"loss": 1.5028,
|
38509 |
+
"step": 10952
|
38510 |
+
},
|
38511 |
+
{
|
38512 |
+
"epoch": 0.48703926014850385,
|
38513 |
+
"grad_norm": 0.06381440162658691,
|
38514 |
+
"learning_rate": 0.0005576888195540848,
|
38515 |
+
"loss": 1.5039,
|
38516 |
+
"step": 10954
|
38517 |
+
},
|
38518 |
+
{
|
38519 |
+
"epoch": 0.4871281846071762,
|
38520 |
+
"grad_norm": 0.06332245469093323,
|
38521 |
+
"learning_rate": 0.000557543604798341,
|
38522 |
+
"loss": 1.5009,
|
38523 |
+
"step": 10956
|
38524 |
+
},
|
38525 |
+
{
|
38526 |
+
"epoch": 0.4872171090658486,
|
38527 |
+
"grad_norm": 0.06438940018415451,
|
38528 |
+
"learning_rate": 0.0005573983851235165,
|
38529 |
+
"loss": 1.5028,
|
38530 |
+
"step": 10958
|
38531 |
+
},
|
38532 |
+
{
|
38533 |
+
"epoch": 0.4873060335245209,
|
38534 |
+
"grad_norm": 0.06432240456342697,
|
38535 |
+
"learning_rate": 0.000557253160542025,
|
38536 |
+
"loss": 1.4928,
|
38537 |
+
"step": 10960
|
38538 |
+
},
|
38539 |
+
{
|
38540 |
+
"epoch": 0.48739495798319327,
|
38541 |
+
"grad_norm": 0.06432882696390152,
|
38542 |
+
"learning_rate": 0.0005571079310662811,
|
38543 |
+
"loss": 1.5016,
|
38544 |
+
"step": 10962
|
38545 |
+
},
|
38546 |
+
{
|
38547 |
+
"epoch": 0.48748388244186563,
|
38548 |
+
"grad_norm": 0.06522729992866516,
|
38549 |
+
"learning_rate": 0.0005569626967086995,
|
38550 |
+
"loss": 1.5001,
|
38551 |
+
"step": 10964
|
38552 |
+
},
|
38553 |
+
{
|
38554 |
+
"epoch": 0.487572806900538,
|
38555 |
+
"grad_norm": 0.0628109946846962,
|
38556 |
+
"learning_rate": 0.0005568174574816957,
|
38557 |
+
"loss": 1.505,
|
38558 |
+
"step": 10966
|
38559 |
+
},
|
38560 |
+
{
|
38561 |
+
"epoch": 0.48766173135921037,
|
38562 |
+
"grad_norm": 0.06767455488443375,
|
38563 |
+
"learning_rate": 0.0005566722133976851,
|
38564 |
+
"loss": 1.5021,
|
38565 |
+
"step": 10968
|
38566 |
+
},
|
38567 |
+
{
|
38568 |
+
"epoch": 0.4877506558178827,
|
38569 |
+
"grad_norm": 0.06257057934999466,
|
38570 |
+
"learning_rate": 0.000556526964469084,
|
38571 |
+
"loss": 1.4986,
|
38572 |
+
"step": 10970
|
38573 |
+
},
|
38574 |
+
{
|
38575 |
+
"epoch": 0.48783958027655505,
|
38576 |
+
"grad_norm": 0.06524206697940826,
|
38577 |
+
"learning_rate": 0.0005563817107083088,
|
38578 |
+
"loss": 1.4929,
|
38579 |
+
"step": 10972
|
38580 |
+
},
|
38581 |
+
{
|
38582 |
+
"epoch": 0.4879285047352274,
|
38583 |
+
"grad_norm": 0.06345758587121964,
|
38584 |
+
"learning_rate": 0.0005562364521277766,
|
38585 |
+
"loss": 1.4987,
|
38586 |
+
"step": 10974
|
38587 |
+
},
|
38588 |
+
{
|
38589 |
+
"epoch": 0.4880174291938998,
|
38590 |
+
"grad_norm": 0.0644221380352974,
|
38591 |
+
"learning_rate": 0.0005560911887399047,
|
38592 |
+
"loss": 1.4985,
|
38593 |
+
"step": 10976
|
38594 |
+
},
|
38595 |
+
{
|
38596 |
+
"epoch": 0.48810635365257216,
|
38597 |
+
"grad_norm": 0.06262285262346268,
|
38598 |
+
"learning_rate": 0.0005559459205571106,
|
38599 |
+
"loss": 1.5021,
|
38600 |
+
"step": 10978
|
38601 |
+
},
|
38602 |
+
{
|
38603 |
+
"epoch": 0.4881952781112445,
|
38604 |
+
"grad_norm": 0.06352121382951736,
|
38605 |
+
"learning_rate": 0.0005558006475918128,
|
38606 |
+
"loss": 1.5005,
|
38607 |
+
"step": 10980
|
38608 |
+
},
|
38609 |
+
{
|
38610 |
+
"epoch": 0.48828420256991684,
|
38611 |
+
"grad_norm": 0.06309682130813599,
|
38612 |
+
"learning_rate": 0.0005556553698564297,
|
38613 |
+
"loss": 1.5002,
|
38614 |
+
"step": 10982
|
38615 |
+
},
|
38616 |
+
{
|
38617 |
+
"epoch": 0.4883731270285892,
|
38618 |
+
"grad_norm": 0.061160314828157425,
|
38619 |
+
"learning_rate": 0.0005555100873633804,
|
38620 |
+
"loss": 1.5016,
|
38621 |
+
"step": 10984
|
38622 |
+
},
|
38623 |
+
{
|
38624 |
+
"epoch": 0.4884620514872616,
|
38625 |
+
"grad_norm": 0.06492006033658981,
|
38626 |
+
"learning_rate": 0.0005553648001250842,
|
38627 |
+
"loss": 1.5017,
|
38628 |
+
"step": 10986
|
38629 |
+
},
|
38630 |
+
{
|
38631 |
+
"epoch": 0.48855097594593394,
|
38632 |
+
"grad_norm": 0.06279385089874268,
|
38633 |
+
"learning_rate": 0.0005552195081539608,
|
38634 |
+
"loss": 1.5062,
|
38635 |
+
"step": 10988
|
38636 |
+
},
|
38637 |
+
{
|
38638 |
+
"epoch": 0.4886399004046063,
|
38639 |
+
"grad_norm": 0.06334224343299866,
|
38640 |
+
"learning_rate": 0.0005550742114624305,
|
38641 |
+
"loss": 1.5027,
|
38642 |
+
"step": 10990
|
38643 |
+
},
|
38644 |
+
{
|
38645 |
+
"epoch": 0.4887288248632786,
|
38646 |
+
"grad_norm": 0.06543200463056564,
|
38647 |
+
"learning_rate": 0.000554928910062914,
|
38648 |
+
"loss": 1.5028,
|
38649 |
+
"step": 10992
|
38650 |
+
},
|
38651 |
+
{
|
38652 |
+
"epoch": 0.488817749321951,
|
38653 |
+
"grad_norm": 0.06258574873209,
|
38654 |
+
"learning_rate": 0.0005547836039678321,
|
38655 |
+
"loss": 1.5023,
|
38656 |
+
"step": 10994
|
38657 |
+
},
|
38658 |
+
{
|
38659 |
+
"epoch": 0.48890667378062336,
|
38660 |
+
"grad_norm": 0.06658318638801575,
|
38661 |
+
"learning_rate": 0.0005546382931896065,
|
38662 |
+
"loss": 1.4961,
|
38663 |
+
"step": 10996
|
38664 |
+
},
|
38665 |
+
{
|
38666 |
+
"epoch": 0.4889955982392957,
|
38667 |
+
"grad_norm": 0.062175750732421875,
|
38668 |
+
"learning_rate": 0.0005544929777406586,
|
38669 |
+
"loss": 1.5022,
|
38670 |
+
"step": 10998
|
38671 |
+
},
|
38672 |
+
{
|
38673 |
+
"epoch": 0.4890845226979681,
|
38674 |
+
"grad_norm": 0.06402657181024551,
|
38675 |
+
"learning_rate": 0.0005543476576334109,
|
38676 |
+
"loss": 1.4992,
|
38677 |
+
"step": 11000
|
38678 |
+
},
|
38679 |
+
{
|
38680 |
+
"epoch": 0.4890845226979681,
|
38681 |
+
"eval_loss": 1.4820914268493652,
|
38682 |
+
"eval_runtime": 13.0255,
|
38683 |
+
"eval_samples_per_second": 530.5,
|
38684 |
+
"eval_steps_per_second": 66.332,
|
38685 |
+
"step": 11000
|
38686 |
}
|
38687 |
],
|
38688 |
"logging_steps": 2,
|
|
|
38702 |
"attributes": {}
|
38703 |
}
|
38704 |
},
|
38705 |
+
"total_flos": 2.353704355233792e+19,
|
38706 |
"train_batch_size": 768,
|
38707 |
"trial_name": null,
|
38708 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c16b168a1b45c2dd9bad38e7f3121cc40352741df7801b098a9bb22dcd0ff219
|
3 |
size 5240
|