gabrielaltay
commited on
Training in progress, step 9288, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 439648328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0015c35a2eddaec815db15463fdc85043d66de0a9c2962eda74d561b81414b6f
|
3 |
size 439648328
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 879415866
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eabcb53a0ad24de4121adf17313188b0c1f3343621a0e7c0c20bdb33797bd83f
|
3 |
size 879415866
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc1f62733ff0d8690c2a8797d153e8ec399303cbc92322dd118c7b88915a04be
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da252e7b08747075dc2ad5c781c5deb49e64dd87681bbc7b4aaa89a30b6f9d5f
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a78424df3b6c3f885135b5bf0deeb48c09ab08dbdc18e3e5b017f82ee6747c9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14455,6 +14455,1812 @@
|
|
14455 |
"learning_rate": 9.98157843707582e-06,
|
14456 |
"loss": 5.2778,
|
14457 |
"step": 8256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14458 |
}
|
14459 |
],
|
14460 |
"logging_steps": 4,
|
@@ -14462,7 +16268,7 @@
|
|
14462 |
"num_input_tokens_seen": 0,
|
14463 |
"num_train_epochs": 1,
|
14464 |
"save_steps": 1032,
|
14465 |
-
"total_flos":
|
14466 |
"train_batch_size": 8,
|
14467 |
"trial_name": null,
|
14468 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.900523560209424,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9288,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14455 |
"learning_rate": 9.98157843707582e-06,
|
14456 |
"loss": 5.2778,
|
14457 |
"step": 8256
|
14458 |
+
},
|
14459 |
+
{
|
14460 |
+
"epoch": 0.8,
|
14461 |
+
"grad_norm": 1.036868691444397,
|
14462 |
+
"learning_rate": 9.962187318208263e-06,
|
14463 |
+
"loss": 5.31,
|
14464 |
+
"step": 8260
|
14465 |
+
},
|
14466 |
+
{
|
14467 |
+
"epoch": 0.8,
|
14468 |
+
"grad_norm": 1.0464235544204712,
|
14469 |
+
"learning_rate": 9.942796199340703e-06,
|
14470 |
+
"loss": 5.3512,
|
14471 |
+
"step": 8264
|
14472 |
+
},
|
14473 |
+
{
|
14474 |
+
"epoch": 0.8,
|
14475 |
+
"grad_norm": 1.001470685005188,
|
14476 |
+
"learning_rate": 9.923405080473144e-06,
|
14477 |
+
"loss": 5.2698,
|
14478 |
+
"step": 8268
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 0.8,
|
14482 |
+
"grad_norm": 1.0910736322402954,
|
14483 |
+
"learning_rate": 9.904013961605584e-06,
|
14484 |
+
"loss": 5.363,
|
14485 |
+
"step": 8272
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 0.8,
|
14489 |
+
"grad_norm": 1.087928056716919,
|
14490 |
+
"learning_rate": 9.884622842738026e-06,
|
14491 |
+
"loss": 5.3454,
|
14492 |
+
"step": 8276
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 0.8,
|
14496 |
+
"grad_norm": 1.055014967918396,
|
14497 |
+
"learning_rate": 9.865231723870467e-06,
|
14498 |
+
"loss": 5.3134,
|
14499 |
+
"step": 8280
|
14500 |
+
},
|
14501 |
+
{
|
14502 |
+
"epoch": 0.8,
|
14503 |
+
"grad_norm": 1.1186180114746094,
|
14504 |
+
"learning_rate": 9.84584060500291e-06,
|
14505 |
+
"loss": 5.3016,
|
14506 |
+
"step": 8284
|
14507 |
+
},
|
14508 |
+
{
|
14509 |
+
"epoch": 0.8,
|
14510 |
+
"grad_norm": 1.0159074068069458,
|
14511 |
+
"learning_rate": 9.82644948613535e-06,
|
14512 |
+
"loss": 5.3974,
|
14513 |
+
"step": 8288
|
14514 |
+
},
|
14515 |
+
{
|
14516 |
+
"epoch": 0.8,
|
14517 |
+
"grad_norm": 1.1419733762741089,
|
14518 |
+
"learning_rate": 9.807058367267792e-06,
|
14519 |
+
"loss": 5.333,
|
14520 |
+
"step": 8292
|
14521 |
+
},
|
14522 |
+
{
|
14523 |
+
"epoch": 0.8,
|
14524 |
+
"grad_norm": 1.078598976135254,
|
14525 |
+
"learning_rate": 9.787667248400232e-06,
|
14526 |
+
"loss": 5.2157,
|
14527 |
+
"step": 8296
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 0.8,
|
14531 |
+
"grad_norm": 1.0978525876998901,
|
14532 |
+
"learning_rate": 9.768276129532675e-06,
|
14533 |
+
"loss": 5.3084,
|
14534 |
+
"step": 8300
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 0.81,
|
14538 |
+
"grad_norm": 1.0307817459106445,
|
14539 |
+
"learning_rate": 9.748885010665115e-06,
|
14540 |
+
"loss": 5.2962,
|
14541 |
+
"step": 8304
|
14542 |
+
},
|
14543 |
+
{
|
14544 |
+
"epoch": 0.81,
|
14545 |
+
"grad_norm": 0.9767160415649414,
|
14546 |
+
"learning_rate": 9.729493891797557e-06,
|
14547 |
+
"loss": 5.3765,
|
14548 |
+
"step": 8308
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 0.81,
|
14552 |
+
"grad_norm": 1.048744797706604,
|
14553 |
+
"learning_rate": 9.710102772929998e-06,
|
14554 |
+
"loss": 5.3491,
|
14555 |
+
"step": 8312
|
14556 |
+
},
|
14557 |
+
{
|
14558 |
+
"epoch": 0.81,
|
14559 |
+
"grad_norm": 1.1452877521514893,
|
14560 |
+
"learning_rate": 9.69071165406244e-06,
|
14561 |
+
"loss": 5.3348,
|
14562 |
+
"step": 8316
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 0.81,
|
14566 |
+
"grad_norm": 1.0742149353027344,
|
14567 |
+
"learning_rate": 9.671320535194882e-06,
|
14568 |
+
"loss": 5.4119,
|
14569 |
+
"step": 8320
|
14570 |
+
},
|
14571 |
+
{
|
14572 |
+
"epoch": 0.81,
|
14573 |
+
"grad_norm": 1.016554594039917,
|
14574 |
+
"learning_rate": 9.651929416327323e-06,
|
14575 |
+
"loss": 5.2832,
|
14576 |
+
"step": 8324
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 0.81,
|
14580 |
+
"grad_norm": 1.1286094188690186,
|
14581 |
+
"learning_rate": 9.632538297459765e-06,
|
14582 |
+
"loss": 5.3436,
|
14583 |
+
"step": 8328
|
14584 |
+
},
|
14585 |
+
{
|
14586 |
+
"epoch": 0.81,
|
14587 |
+
"grad_norm": 1.0719375610351562,
|
14588 |
+
"learning_rate": 9.613147178592206e-06,
|
14589 |
+
"loss": 5.2561,
|
14590 |
+
"step": 8332
|
14591 |
+
},
|
14592 |
+
{
|
14593 |
+
"epoch": 0.81,
|
14594 |
+
"grad_norm": 1.0239946842193604,
|
14595 |
+
"learning_rate": 9.593756059724646e-06,
|
14596 |
+
"loss": 5.3411,
|
14597 |
+
"step": 8336
|
14598 |
+
},
|
14599 |
+
{
|
14600 |
+
"epoch": 0.81,
|
14601 |
+
"grad_norm": 1.16642165184021,
|
14602 |
+
"learning_rate": 9.574364940857087e-06,
|
14603 |
+
"loss": 5.2173,
|
14604 |
+
"step": 8340
|
14605 |
+
},
|
14606 |
+
{
|
14607 |
+
"epoch": 0.81,
|
14608 |
+
"grad_norm": 1.056943655014038,
|
14609 |
+
"learning_rate": 9.554973821989529e-06,
|
14610 |
+
"loss": 5.3138,
|
14611 |
+
"step": 8344
|
14612 |
+
},
|
14613 |
+
{
|
14614 |
+
"epoch": 0.81,
|
14615 |
+
"grad_norm": 1.0310717821121216,
|
14616 |
+
"learning_rate": 9.53558270312197e-06,
|
14617 |
+
"loss": 5.2147,
|
14618 |
+
"step": 8348
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 0.81,
|
14622 |
+
"grad_norm": 1.0939549207687378,
|
14623 |
+
"learning_rate": 9.516191584254412e-06,
|
14624 |
+
"loss": 5.3849,
|
14625 |
+
"step": 8352
|
14626 |
+
},
|
14627 |
+
{
|
14628 |
+
"epoch": 0.81,
|
14629 |
+
"grad_norm": 1.0846009254455566,
|
14630 |
+
"learning_rate": 9.496800465386852e-06,
|
14631 |
+
"loss": 5.2793,
|
14632 |
+
"step": 8356
|
14633 |
+
},
|
14634 |
+
{
|
14635 |
+
"epoch": 0.81,
|
14636 |
+
"grad_norm": 1.0984148979187012,
|
14637 |
+
"learning_rate": 9.477409346519295e-06,
|
14638 |
+
"loss": 5.3142,
|
14639 |
+
"step": 8360
|
14640 |
+
},
|
14641 |
+
{
|
14642 |
+
"epoch": 0.81,
|
14643 |
+
"grad_norm": 1.035758376121521,
|
14644 |
+
"learning_rate": 9.458018227651735e-06,
|
14645 |
+
"loss": 5.264,
|
14646 |
+
"step": 8364
|
14647 |
+
},
|
14648 |
+
{
|
14649 |
+
"epoch": 0.81,
|
14650 |
+
"grad_norm": 1.0837132930755615,
|
14651 |
+
"learning_rate": 9.438627108784177e-06,
|
14652 |
+
"loss": 5.432,
|
14653 |
+
"step": 8368
|
14654 |
+
},
|
14655 |
+
{
|
14656 |
+
"epoch": 0.81,
|
14657 |
+
"grad_norm": 1.0333995819091797,
|
14658 |
+
"learning_rate": 9.419235989916618e-06,
|
14659 |
+
"loss": 5.2622,
|
14660 |
+
"step": 8372
|
14661 |
+
},
|
14662 |
+
{
|
14663 |
+
"epoch": 0.81,
|
14664 |
+
"grad_norm": 1.054474949836731,
|
14665 |
+
"learning_rate": 9.39984487104906e-06,
|
14666 |
+
"loss": 5.2721,
|
14667 |
+
"step": 8376
|
14668 |
+
},
|
14669 |
+
{
|
14670 |
+
"epoch": 0.81,
|
14671 |
+
"grad_norm": 1.0750809907913208,
|
14672 |
+
"learning_rate": 9.380453752181502e-06,
|
14673 |
+
"loss": 5.2433,
|
14674 |
+
"step": 8380
|
14675 |
+
},
|
14676 |
+
{
|
14677 |
+
"epoch": 0.81,
|
14678 |
+
"grad_norm": 1.054914116859436,
|
14679 |
+
"learning_rate": 9.361062633313943e-06,
|
14680 |
+
"loss": 5.3407,
|
14681 |
+
"step": 8384
|
14682 |
+
},
|
14683 |
+
{
|
14684 |
+
"epoch": 0.81,
|
14685 |
+
"grad_norm": 1.1101247072219849,
|
14686 |
+
"learning_rate": 9.341671514446385e-06,
|
14687 |
+
"loss": 5.2774,
|
14688 |
+
"step": 8388
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 0.81,
|
14692 |
+
"grad_norm": 1.1054069995880127,
|
14693 |
+
"learning_rate": 9.322280395578826e-06,
|
14694 |
+
"loss": 5.3978,
|
14695 |
+
"step": 8392
|
14696 |
+
},
|
14697 |
+
{
|
14698 |
+
"epoch": 0.81,
|
14699 |
+
"grad_norm": 1.0813637971878052,
|
14700 |
+
"learning_rate": 9.302889276711268e-06,
|
14701 |
+
"loss": 5.3735,
|
14702 |
+
"step": 8396
|
14703 |
+
},
|
14704 |
+
{
|
14705 |
+
"epoch": 0.81,
|
14706 |
+
"grad_norm": 1.151734709739685,
|
14707 |
+
"learning_rate": 9.283498157843708e-06,
|
14708 |
+
"loss": 5.4109,
|
14709 |
+
"step": 8400
|
14710 |
+
},
|
14711 |
+
{
|
14712 |
+
"epoch": 0.81,
|
14713 |
+
"grad_norm": 1.0774791240692139,
|
14714 |
+
"learning_rate": 9.264107038976149e-06,
|
14715 |
+
"loss": 5.2963,
|
14716 |
+
"step": 8404
|
14717 |
+
},
|
14718 |
+
{
|
14719 |
+
"epoch": 0.82,
|
14720 |
+
"grad_norm": 1.0468578338623047,
|
14721 |
+
"learning_rate": 9.24471592010859e-06,
|
14722 |
+
"loss": 5.334,
|
14723 |
+
"step": 8408
|
14724 |
+
},
|
14725 |
+
{
|
14726 |
+
"epoch": 0.82,
|
14727 |
+
"grad_norm": 1.0247350931167603,
|
14728 |
+
"learning_rate": 9.225324801241032e-06,
|
14729 |
+
"loss": 5.3307,
|
14730 |
+
"step": 8412
|
14731 |
+
},
|
14732 |
+
{
|
14733 |
+
"epoch": 0.82,
|
14734 |
+
"grad_norm": 1.021700143814087,
|
14735 |
+
"learning_rate": 9.205933682373472e-06,
|
14736 |
+
"loss": 5.3315,
|
14737 |
+
"step": 8416
|
14738 |
+
},
|
14739 |
+
{
|
14740 |
+
"epoch": 0.82,
|
14741 |
+
"grad_norm": 1.0852890014648438,
|
14742 |
+
"learning_rate": 9.186542563505915e-06,
|
14743 |
+
"loss": 5.23,
|
14744 |
+
"step": 8420
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 0.82,
|
14748 |
+
"grad_norm": 1.0435699224472046,
|
14749 |
+
"learning_rate": 9.167151444638355e-06,
|
14750 |
+
"loss": 5.3353,
|
14751 |
+
"step": 8424
|
14752 |
+
},
|
14753 |
+
{
|
14754 |
+
"epoch": 0.82,
|
14755 |
+
"grad_norm": 1.0124831199645996,
|
14756 |
+
"learning_rate": 9.147760325770797e-06,
|
14757 |
+
"loss": 5.3257,
|
14758 |
+
"step": 8428
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 0.82,
|
14762 |
+
"grad_norm": 1.0061957836151123,
|
14763 |
+
"learning_rate": 9.128369206903238e-06,
|
14764 |
+
"loss": 5.3437,
|
14765 |
+
"step": 8432
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 0.82,
|
14769 |
+
"grad_norm": 1.0675718784332275,
|
14770 |
+
"learning_rate": 9.10897808803568e-06,
|
14771 |
+
"loss": 5.3543,
|
14772 |
+
"step": 8436
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 0.82,
|
14776 |
+
"grad_norm": 1.0940582752227783,
|
14777 |
+
"learning_rate": 9.089586969168122e-06,
|
14778 |
+
"loss": 5.3205,
|
14779 |
+
"step": 8440
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 0.82,
|
14783 |
+
"grad_norm": 1.0253922939300537,
|
14784 |
+
"learning_rate": 9.070195850300563e-06,
|
14785 |
+
"loss": 5.2836,
|
14786 |
+
"step": 8444
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 0.82,
|
14790 |
+
"grad_norm": 1.0632801055908203,
|
14791 |
+
"learning_rate": 9.050804731433005e-06,
|
14792 |
+
"loss": 5.3634,
|
14793 |
+
"step": 8448
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 0.82,
|
14797 |
+
"grad_norm": 1.151405692100525,
|
14798 |
+
"learning_rate": 9.031413612565446e-06,
|
14799 |
+
"loss": 5.266,
|
14800 |
+
"step": 8452
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 0.82,
|
14804 |
+
"grad_norm": 1.1215803623199463,
|
14805 |
+
"learning_rate": 9.012022493697888e-06,
|
14806 |
+
"loss": 5.3605,
|
14807 |
+
"step": 8456
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 0.82,
|
14811 |
+
"grad_norm": 1.0406687259674072,
|
14812 |
+
"learning_rate": 8.992631374830328e-06,
|
14813 |
+
"loss": 5.3037,
|
14814 |
+
"step": 8460
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 0.82,
|
14818 |
+
"grad_norm": 1.0305143594741821,
|
14819 |
+
"learning_rate": 8.97324025596277e-06,
|
14820 |
+
"loss": 5.2419,
|
14821 |
+
"step": 8464
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 0.82,
|
14825 |
+
"grad_norm": 1.0159248113632202,
|
14826 |
+
"learning_rate": 8.953849137095211e-06,
|
14827 |
+
"loss": 5.2576,
|
14828 |
+
"step": 8468
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 0.82,
|
14832 |
+
"grad_norm": 1.0444971323013306,
|
14833 |
+
"learning_rate": 8.934458018227652e-06,
|
14834 |
+
"loss": 5.2461,
|
14835 |
+
"step": 8472
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 0.82,
|
14839 |
+
"grad_norm": 1.062738299369812,
|
14840 |
+
"learning_rate": 8.915066899360092e-06,
|
14841 |
+
"loss": 5.2934,
|
14842 |
+
"step": 8476
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 0.82,
|
14846 |
+
"grad_norm": 1.0445396900177002,
|
14847 |
+
"learning_rate": 8.895675780492534e-06,
|
14848 |
+
"loss": 5.3011,
|
14849 |
+
"step": 8480
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 0.82,
|
14853 |
+
"grad_norm": 1.1299471855163574,
|
14854 |
+
"learning_rate": 8.876284661624975e-06,
|
14855 |
+
"loss": 5.1977,
|
14856 |
+
"step": 8484
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 0.82,
|
14860 |
+
"grad_norm": 1.0241918563842773,
|
14861 |
+
"learning_rate": 8.856893542757417e-06,
|
14862 |
+
"loss": 5.2821,
|
14863 |
+
"step": 8488
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 0.82,
|
14867 |
+
"grad_norm": 1.0978903770446777,
|
14868 |
+
"learning_rate": 8.837502423889858e-06,
|
14869 |
+
"loss": 5.3411,
|
14870 |
+
"step": 8492
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 0.82,
|
14874 |
+
"grad_norm": 1.0630090236663818,
|
14875 |
+
"learning_rate": 8.8181113050223e-06,
|
14876 |
+
"loss": 5.2487,
|
14877 |
+
"step": 8496
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 0.82,
|
14881 |
+
"grad_norm": 1.0468335151672363,
|
14882 |
+
"learning_rate": 8.798720186154742e-06,
|
14883 |
+
"loss": 5.2238,
|
14884 |
+
"step": 8500
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 0.82,
|
14888 |
+
"grad_norm": 1.0701797008514404,
|
14889 |
+
"learning_rate": 8.779329067287183e-06,
|
14890 |
+
"loss": 5.2461,
|
14891 |
+
"step": 8504
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 0.82,
|
14895 |
+
"grad_norm": 1.0644505023956299,
|
14896 |
+
"learning_rate": 8.759937948419625e-06,
|
14897 |
+
"loss": 5.2462,
|
14898 |
+
"step": 8508
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 0.83,
|
14902 |
+
"grad_norm": 1.0857114791870117,
|
14903 |
+
"learning_rate": 8.740546829552066e-06,
|
14904 |
+
"loss": 5.3715,
|
14905 |
+
"step": 8512
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 0.83,
|
14909 |
+
"grad_norm": 1.0911511182785034,
|
14910 |
+
"learning_rate": 8.721155710684508e-06,
|
14911 |
+
"loss": 5.3341,
|
14912 |
+
"step": 8516
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 0.83,
|
14916 |
+
"grad_norm": 1.066658854484558,
|
14917 |
+
"learning_rate": 8.701764591816948e-06,
|
14918 |
+
"loss": 5.2962,
|
14919 |
+
"step": 8520
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 0.83,
|
14923 |
+
"grad_norm": 1.0484204292297363,
|
14924 |
+
"learning_rate": 8.68237347294939e-06,
|
14925 |
+
"loss": 5.2937,
|
14926 |
+
"step": 8524
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 0.83,
|
14930 |
+
"grad_norm": 1.1011348962783813,
|
14931 |
+
"learning_rate": 8.662982354081831e-06,
|
14932 |
+
"loss": 5.3372,
|
14933 |
+
"step": 8528
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 0.83,
|
14937 |
+
"grad_norm": 1.1982569694519043,
|
14938 |
+
"learning_rate": 8.643591235214273e-06,
|
14939 |
+
"loss": 5.1987,
|
14940 |
+
"step": 8532
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 0.83,
|
14944 |
+
"grad_norm": 1.1187928915023804,
|
14945 |
+
"learning_rate": 8.624200116346714e-06,
|
14946 |
+
"loss": 5.3326,
|
14947 |
+
"step": 8536
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 0.83,
|
14951 |
+
"grad_norm": 1.0191898345947266,
|
14952 |
+
"learning_rate": 8.604808997479154e-06,
|
14953 |
+
"loss": 5.384,
|
14954 |
+
"step": 8540
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 0.83,
|
14958 |
+
"grad_norm": 1.1834492683410645,
|
14959 |
+
"learning_rate": 8.585417878611597e-06,
|
14960 |
+
"loss": 5.3774,
|
14961 |
+
"step": 8544
|
14962 |
+
},
|
14963 |
+
{
|
14964 |
+
"epoch": 0.83,
|
14965 |
+
"grad_norm": 1.0459861755371094,
|
14966 |
+
"learning_rate": 8.566026759744037e-06,
|
14967 |
+
"loss": 5.3309,
|
14968 |
+
"step": 8548
|
14969 |
+
},
|
14970 |
+
{
|
14971 |
+
"epoch": 0.83,
|
14972 |
+
"grad_norm": 1.019656777381897,
|
14973 |
+
"learning_rate": 8.546635640876478e-06,
|
14974 |
+
"loss": 5.3024,
|
14975 |
+
"step": 8552
|
14976 |
+
},
|
14977 |
+
{
|
14978 |
+
"epoch": 0.83,
|
14979 |
+
"grad_norm": 1.1104713678359985,
|
14980 |
+
"learning_rate": 8.52724452200892e-06,
|
14981 |
+
"loss": 5.2309,
|
14982 |
+
"step": 8556
|
14983 |
+
},
|
14984 |
+
{
|
14985 |
+
"epoch": 0.83,
|
14986 |
+
"grad_norm": 1.0480828285217285,
|
14987 |
+
"learning_rate": 8.507853403141362e-06,
|
14988 |
+
"loss": 5.256,
|
14989 |
+
"step": 8560
|
14990 |
+
},
|
14991 |
+
{
|
14992 |
+
"epoch": 0.83,
|
14993 |
+
"grad_norm": 1.0698785781860352,
|
14994 |
+
"learning_rate": 8.488462284273803e-06,
|
14995 |
+
"loss": 5.2322,
|
14996 |
+
"step": 8564
|
14997 |
+
},
|
14998 |
+
{
|
14999 |
+
"epoch": 0.83,
|
15000 |
+
"grad_norm": 1.0998084545135498,
|
15001 |
+
"learning_rate": 8.469071165406245e-06,
|
15002 |
+
"loss": 5.2919,
|
15003 |
+
"step": 8568
|
15004 |
+
},
|
15005 |
+
{
|
15006 |
+
"epoch": 0.83,
|
15007 |
+
"grad_norm": 1.0241094827651978,
|
15008 |
+
"learning_rate": 8.449680046538685e-06,
|
15009 |
+
"loss": 5.3195,
|
15010 |
+
"step": 8572
|
15011 |
+
},
|
15012 |
+
{
|
15013 |
+
"epoch": 0.83,
|
15014 |
+
"grad_norm": 1.1492643356323242,
|
15015 |
+
"learning_rate": 8.430288927671128e-06,
|
15016 |
+
"loss": 5.2311,
|
15017 |
+
"step": 8576
|
15018 |
+
},
|
15019 |
+
{
|
15020 |
+
"epoch": 0.83,
|
15021 |
+
"grad_norm": 1.1048632860183716,
|
15022 |
+
"learning_rate": 8.410897808803568e-06,
|
15023 |
+
"loss": 5.3227,
|
15024 |
+
"step": 8580
|
15025 |
+
},
|
15026 |
+
{
|
15027 |
+
"epoch": 0.83,
|
15028 |
+
"grad_norm": 1.0702450275421143,
|
15029 |
+
"learning_rate": 8.39150668993601e-06,
|
15030 |
+
"loss": 5.3173,
|
15031 |
+
"step": 8584
|
15032 |
+
},
|
15033 |
+
{
|
15034 |
+
"epoch": 0.83,
|
15035 |
+
"grad_norm": 1.0409200191497803,
|
15036 |
+
"learning_rate": 8.372115571068451e-06,
|
15037 |
+
"loss": 5.3185,
|
15038 |
+
"step": 8588
|
15039 |
+
},
|
15040 |
+
{
|
15041 |
+
"epoch": 0.83,
|
15042 |
+
"grad_norm": 1.046713948249817,
|
15043 |
+
"learning_rate": 8.352724452200893e-06,
|
15044 |
+
"loss": 5.3996,
|
15045 |
+
"step": 8592
|
15046 |
+
},
|
15047 |
+
{
|
15048 |
+
"epoch": 0.83,
|
15049 |
+
"grad_norm": 1.039919376373291,
|
15050 |
+
"learning_rate": 8.333333333333334e-06,
|
15051 |
+
"loss": 5.2483,
|
15052 |
+
"step": 8596
|
15053 |
+
},
|
15054 |
+
{
|
15055 |
+
"epoch": 0.83,
|
15056 |
+
"grad_norm": 1.0520331859588623,
|
15057 |
+
"learning_rate": 8.313942214465776e-06,
|
15058 |
+
"loss": 5.3082,
|
15059 |
+
"step": 8600
|
15060 |
+
},
|
15061 |
+
{
|
15062 |
+
"epoch": 0.83,
|
15063 |
+
"grad_norm": 1.0794312953948975,
|
15064 |
+
"learning_rate": 8.294551095598217e-06,
|
15065 |
+
"loss": 5.382,
|
15066 |
+
"step": 8604
|
15067 |
+
},
|
15068 |
+
{
|
15069 |
+
"epoch": 0.83,
|
15070 |
+
"grad_norm": 1.0222445726394653,
|
15071 |
+
"learning_rate": 8.275159976730657e-06,
|
15072 |
+
"loss": 5.2186,
|
15073 |
+
"step": 8608
|
15074 |
+
},
|
15075 |
+
{
|
15076 |
+
"epoch": 0.83,
|
15077 |
+
"grad_norm": 1.043550729751587,
|
15078 |
+
"learning_rate": 8.2557688578631e-06,
|
15079 |
+
"loss": 5.2447,
|
15080 |
+
"step": 8612
|
15081 |
+
},
|
15082 |
+
{
|
15083 |
+
"epoch": 0.84,
|
15084 |
+
"grad_norm": 1.065027117729187,
|
15085 |
+
"learning_rate": 8.23637773899554e-06,
|
15086 |
+
"loss": 5.3149,
|
15087 |
+
"step": 8616
|
15088 |
+
},
|
15089 |
+
{
|
15090 |
+
"epoch": 0.84,
|
15091 |
+
"grad_norm": 1.089449405670166,
|
15092 |
+
"learning_rate": 8.216986620127982e-06,
|
15093 |
+
"loss": 5.231,
|
15094 |
+
"step": 8620
|
15095 |
+
},
|
15096 |
+
{
|
15097 |
+
"epoch": 0.84,
|
15098 |
+
"grad_norm": 1.0330005884170532,
|
15099 |
+
"learning_rate": 8.197595501260423e-06,
|
15100 |
+
"loss": 5.3099,
|
15101 |
+
"step": 8624
|
15102 |
+
},
|
15103 |
+
{
|
15104 |
+
"epoch": 0.84,
|
15105 |
+
"grad_norm": 1.088131070137024,
|
15106 |
+
"learning_rate": 8.178204382392865e-06,
|
15107 |
+
"loss": 5.3941,
|
15108 |
+
"step": 8628
|
15109 |
+
},
|
15110 |
+
{
|
15111 |
+
"epoch": 0.84,
|
15112 |
+
"grad_norm": 1.0130773782730103,
|
15113 |
+
"learning_rate": 8.158813263525305e-06,
|
15114 |
+
"loss": 5.2442,
|
15115 |
+
"step": 8632
|
15116 |
+
},
|
15117 |
+
{
|
15118 |
+
"epoch": 0.84,
|
15119 |
+
"grad_norm": 1.035882592201233,
|
15120 |
+
"learning_rate": 8.139422144657748e-06,
|
15121 |
+
"loss": 5.21,
|
15122 |
+
"step": 8636
|
15123 |
+
},
|
15124 |
+
{
|
15125 |
+
"epoch": 0.84,
|
15126 |
+
"grad_norm": 1.0550565719604492,
|
15127 |
+
"learning_rate": 8.120031025790188e-06,
|
15128 |
+
"loss": 5.2888,
|
15129 |
+
"step": 8640
|
15130 |
+
},
|
15131 |
+
{
|
15132 |
+
"epoch": 0.84,
|
15133 |
+
"grad_norm": 1.1436634063720703,
|
15134 |
+
"learning_rate": 8.10063990692263e-06,
|
15135 |
+
"loss": 5.3587,
|
15136 |
+
"step": 8644
|
15137 |
+
},
|
15138 |
+
{
|
15139 |
+
"epoch": 0.84,
|
15140 |
+
"grad_norm": 1.1211497783660889,
|
15141 |
+
"learning_rate": 8.081248788055071e-06,
|
15142 |
+
"loss": 5.3188,
|
15143 |
+
"step": 8648
|
15144 |
+
},
|
15145 |
+
{
|
15146 |
+
"epoch": 0.84,
|
15147 |
+
"grad_norm": 1.1205918788909912,
|
15148 |
+
"learning_rate": 8.061857669187513e-06,
|
15149 |
+
"loss": 5.2688,
|
15150 |
+
"step": 8652
|
15151 |
+
},
|
15152 |
+
{
|
15153 |
+
"epoch": 0.84,
|
15154 |
+
"grad_norm": 1.0907244682312012,
|
15155 |
+
"learning_rate": 8.042466550319954e-06,
|
15156 |
+
"loss": 5.2518,
|
15157 |
+
"step": 8656
|
15158 |
+
},
|
15159 |
+
{
|
15160 |
+
"epoch": 0.84,
|
15161 |
+
"grad_norm": 1.0856692790985107,
|
15162 |
+
"learning_rate": 8.023075431452396e-06,
|
15163 |
+
"loss": 5.3384,
|
15164 |
+
"step": 8660
|
15165 |
+
},
|
15166 |
+
{
|
15167 |
+
"epoch": 0.84,
|
15168 |
+
"grad_norm": 1.0303173065185547,
|
15169 |
+
"learning_rate": 8.003684312584836e-06,
|
15170 |
+
"loss": 5.2906,
|
15171 |
+
"step": 8664
|
15172 |
+
},
|
15173 |
+
{
|
15174 |
+
"epoch": 0.84,
|
15175 |
+
"grad_norm": 1.0908282995224,
|
15176 |
+
"learning_rate": 7.984293193717279e-06,
|
15177 |
+
"loss": 5.3331,
|
15178 |
+
"step": 8668
|
15179 |
+
},
|
15180 |
+
{
|
15181 |
+
"epoch": 0.84,
|
15182 |
+
"grad_norm": 1.088040828704834,
|
15183 |
+
"learning_rate": 7.96490207484972e-06,
|
15184 |
+
"loss": 5.3521,
|
15185 |
+
"step": 8672
|
15186 |
+
},
|
15187 |
+
{
|
15188 |
+
"epoch": 0.84,
|
15189 |
+
"grad_norm": 1.0086363554000854,
|
15190 |
+
"learning_rate": 7.94551095598216e-06,
|
15191 |
+
"loss": 5.3294,
|
15192 |
+
"step": 8676
|
15193 |
+
},
|
15194 |
+
{
|
15195 |
+
"epoch": 0.84,
|
15196 |
+
"grad_norm": 1.1544169187545776,
|
15197 |
+
"learning_rate": 7.926119837114602e-06,
|
15198 |
+
"loss": 5.3184,
|
15199 |
+
"step": 8680
|
15200 |
+
},
|
15201 |
+
{
|
15202 |
+
"epoch": 0.84,
|
15203 |
+
"grad_norm": 1.0530931949615479,
|
15204 |
+
"learning_rate": 7.906728718247042e-06,
|
15205 |
+
"loss": 5.2866,
|
15206 |
+
"step": 8684
|
15207 |
+
},
|
15208 |
+
{
|
15209 |
+
"epoch": 0.84,
|
15210 |
+
"grad_norm": 1.080121397972107,
|
15211 |
+
"learning_rate": 7.887337599379485e-06,
|
15212 |
+
"loss": 5.2793,
|
15213 |
+
"step": 8688
|
15214 |
+
},
|
15215 |
+
{
|
15216 |
+
"epoch": 0.84,
|
15217 |
+
"grad_norm": 1.0175666809082031,
|
15218 |
+
"learning_rate": 7.867946480511925e-06,
|
15219 |
+
"loss": 5.3282,
|
15220 |
+
"step": 8692
|
15221 |
+
},
|
15222 |
+
{
|
15223 |
+
"epoch": 0.84,
|
15224 |
+
"grad_norm": 1.0283890962600708,
|
15225 |
+
"learning_rate": 7.848555361644367e-06,
|
15226 |
+
"loss": 5.3439,
|
15227 |
+
"step": 8696
|
15228 |
+
},
|
15229 |
+
{
|
15230 |
+
"epoch": 0.84,
|
15231 |
+
"grad_norm": 1.0480095148086548,
|
15232 |
+
"learning_rate": 7.829164242776808e-06,
|
15233 |
+
"loss": 5.3042,
|
15234 |
+
"step": 8700
|
15235 |
+
},
|
15236 |
+
{
|
15237 |
+
"epoch": 0.84,
|
15238 |
+
"grad_norm": 0.9947773814201355,
|
15239 |
+
"learning_rate": 7.80977312390925e-06,
|
15240 |
+
"loss": 5.251,
|
15241 |
+
"step": 8704
|
15242 |
+
},
|
15243 |
+
{
|
15244 |
+
"epoch": 0.84,
|
15245 |
+
"grad_norm": 1.033823847770691,
|
15246 |
+
"learning_rate": 7.79038200504169e-06,
|
15247 |
+
"loss": 5.4016,
|
15248 |
+
"step": 8708
|
15249 |
+
},
|
15250 |
+
{
|
15251 |
+
"epoch": 0.84,
|
15252 |
+
"grad_norm": 1.129824161529541,
|
15253 |
+
"learning_rate": 7.770990886174133e-06,
|
15254 |
+
"loss": 5.2176,
|
15255 |
+
"step": 8712
|
15256 |
+
},
|
15257 |
+
{
|
15258 |
+
"epoch": 0.85,
|
15259 |
+
"grad_norm": 1.0202304124832153,
|
15260 |
+
"learning_rate": 7.751599767306574e-06,
|
15261 |
+
"loss": 5.3219,
|
15262 |
+
"step": 8716
|
15263 |
+
},
|
15264 |
+
{
|
15265 |
+
"epoch": 0.85,
|
15266 |
+
"grad_norm": 1.0748639106750488,
|
15267 |
+
"learning_rate": 7.732208648439016e-06,
|
15268 |
+
"loss": 5.2945,
|
15269 |
+
"step": 8720
|
15270 |
+
},
|
15271 |
+
{
|
15272 |
+
"epoch": 0.85,
|
15273 |
+
"grad_norm": 1.0026463270187378,
|
15274 |
+
"learning_rate": 7.712817529571456e-06,
|
15275 |
+
"loss": 5.3474,
|
15276 |
+
"step": 8724
|
15277 |
+
},
|
15278 |
+
{
|
15279 |
+
"epoch": 0.85,
|
15280 |
+
"grad_norm": 0.98891282081604,
|
15281 |
+
"learning_rate": 7.693426410703899e-06,
|
15282 |
+
"loss": 5.2612,
|
15283 |
+
"step": 8728
|
15284 |
+
},
|
15285 |
+
{
|
15286 |
+
"epoch": 0.85,
|
15287 |
+
"grad_norm": 1.079750418663025,
|
15288 |
+
"learning_rate": 7.674035291836339e-06,
|
15289 |
+
"loss": 5.2398,
|
15290 |
+
"step": 8732
|
15291 |
+
},
|
15292 |
+
{
|
15293 |
+
"epoch": 0.85,
|
15294 |
+
"grad_norm": 1.057255744934082,
|
15295 |
+
"learning_rate": 7.654644172968781e-06,
|
15296 |
+
"loss": 5.4364,
|
15297 |
+
"step": 8736
|
15298 |
+
},
|
15299 |
+
{
|
15300 |
+
"epoch": 0.85,
|
15301 |
+
"grad_norm": 1.0570470094680786,
|
15302 |
+
"learning_rate": 7.635253054101222e-06,
|
15303 |
+
"loss": 5.2695,
|
15304 |
+
"step": 8740
|
15305 |
+
},
|
15306 |
+
{
|
15307 |
+
"epoch": 0.85,
|
15308 |
+
"grad_norm": 1.0664699077606201,
|
15309 |
+
"learning_rate": 7.615861935233663e-06,
|
15310 |
+
"loss": 5.3378,
|
15311 |
+
"step": 8744
|
15312 |
+
},
|
15313 |
+
{
|
15314 |
+
"epoch": 0.85,
|
15315 |
+
"grad_norm": 1.0890289545059204,
|
15316 |
+
"learning_rate": 7.5964708163661055e-06,
|
15317 |
+
"loss": 5.375,
|
15318 |
+
"step": 8748
|
15319 |
+
},
|
15320 |
+
{
|
15321 |
+
"epoch": 0.85,
|
15322 |
+
"grad_norm": 1.0607807636260986,
|
15323 |
+
"learning_rate": 7.577079697498546e-06,
|
15324 |
+
"loss": 5.3258,
|
15325 |
+
"step": 8752
|
15326 |
+
},
|
15327 |
+
{
|
15328 |
+
"epoch": 0.85,
|
15329 |
+
"grad_norm": 1.0911225080490112,
|
15330 |
+
"learning_rate": 7.557688578630988e-06,
|
15331 |
+
"loss": 5.242,
|
15332 |
+
"step": 8756
|
15333 |
+
},
|
15334 |
+
{
|
15335 |
+
"epoch": 0.85,
|
15336 |
+
"grad_norm": 1.1005817651748657,
|
15337 |
+
"learning_rate": 7.538297459763429e-06,
|
15338 |
+
"loss": 5.2734,
|
15339 |
+
"step": 8760
|
15340 |
+
},
|
15341 |
+
{
|
15342 |
+
"epoch": 0.85,
|
15343 |
+
"grad_norm": 1.0650907754898071,
|
15344 |
+
"learning_rate": 7.51890634089587e-06,
|
15345 |
+
"loss": 5.2939,
|
15346 |
+
"step": 8764
|
15347 |
+
},
|
15348 |
+
{
|
15349 |
+
"epoch": 0.85,
|
15350 |
+
"grad_norm": 1.0343334674835205,
|
15351 |
+
"learning_rate": 7.499515222028311e-06,
|
15352 |
+
"loss": 5.2601,
|
15353 |
+
"step": 8768
|
15354 |
+
},
|
15355 |
+
{
|
15356 |
+
"epoch": 0.85,
|
15357 |
+
"grad_norm": 1.092239260673523,
|
15358 |
+
"learning_rate": 7.480124103160753e-06,
|
15359 |
+
"loss": 5.266,
|
15360 |
+
"step": 8772
|
15361 |
+
},
|
15362 |
+
{
|
15363 |
+
"epoch": 0.85,
|
15364 |
+
"grad_norm": 1.140648603439331,
|
15365 |
+
"learning_rate": 7.4607329842931935e-06,
|
15366 |
+
"loss": 5.2353,
|
15367 |
+
"step": 8776
|
15368 |
+
},
|
15369 |
+
{
|
15370 |
+
"epoch": 0.85,
|
15371 |
+
"grad_norm": 1.0743423700332642,
|
15372 |
+
"learning_rate": 7.441341865425636e-06,
|
15373 |
+
"loss": 5.389,
|
15374 |
+
"step": 8780
|
15375 |
+
},
|
15376 |
+
{
|
15377 |
+
"epoch": 0.85,
|
15378 |
+
"grad_norm": 1.0870285034179688,
|
15379 |
+
"learning_rate": 7.421950746558076e-06,
|
15380 |
+
"loss": 5.3212,
|
15381 |
+
"step": 8784
|
15382 |
+
},
|
15383 |
+
{
|
15384 |
+
"epoch": 0.85,
|
15385 |
+
"grad_norm": 1.0318245887756348,
|
15386 |
+
"learning_rate": 7.4025596276905185e-06,
|
15387 |
+
"loss": 5.2854,
|
15388 |
+
"step": 8788
|
15389 |
+
},
|
15390 |
+
{
|
15391 |
+
"epoch": 0.85,
|
15392 |
+
"grad_norm": 1.0597593784332275,
|
15393 |
+
"learning_rate": 7.383168508822959e-06,
|
15394 |
+
"loss": 5.4204,
|
15395 |
+
"step": 8792
|
15396 |
+
},
|
15397 |
+
{
|
15398 |
+
"epoch": 0.85,
|
15399 |
+
"grad_norm": 1.0621132850646973,
|
15400 |
+
"learning_rate": 7.3637773899554e-06,
|
15401 |
+
"loss": 5.2903,
|
15402 |
+
"step": 8796
|
15403 |
+
},
|
15404 |
+
{
|
15405 |
+
"epoch": 0.85,
|
15406 |
+
"grad_norm": 1.057024598121643,
|
15407 |
+
"learning_rate": 7.344386271087843e-06,
|
15408 |
+
"loss": 5.3975,
|
15409 |
+
"step": 8800
|
15410 |
+
},
|
15411 |
+
{
|
15412 |
+
"epoch": 0.85,
|
15413 |
+
"grad_norm": 1.1537240743637085,
|
15414 |
+
"learning_rate": 7.324995152220283e-06,
|
15415 |
+
"loss": 5.3118,
|
15416 |
+
"step": 8804
|
15417 |
+
},
|
15418 |
+
{
|
15419 |
+
"epoch": 0.85,
|
15420 |
+
"grad_norm": 1.161657691001892,
|
15421 |
+
"learning_rate": 7.305604033352725e-06,
|
15422 |
+
"loss": 5.3516,
|
15423 |
+
"step": 8808
|
15424 |
+
},
|
15425 |
+
{
|
15426 |
+
"epoch": 0.85,
|
15427 |
+
"grad_norm": 1.0824769735336304,
|
15428 |
+
"learning_rate": 7.286212914485166e-06,
|
15429 |
+
"loss": 5.3601,
|
15430 |
+
"step": 8812
|
15431 |
+
},
|
15432 |
+
{
|
15433 |
+
"epoch": 0.85,
|
15434 |
+
"grad_norm": 1.0471476316452026,
|
15435 |
+
"learning_rate": 7.266821795617608e-06,
|
15436 |
+
"loss": 5.2858,
|
15437 |
+
"step": 8816
|
15438 |
+
},
|
15439 |
+
{
|
15440 |
+
"epoch": 0.86,
|
15441 |
+
"grad_norm": 1.0438990592956543,
|
15442 |
+
"learning_rate": 7.247430676750049e-06,
|
15443 |
+
"loss": 5.2688,
|
15444 |
+
"step": 8820
|
15445 |
+
},
|
15446 |
+
{
|
15447 |
+
"epoch": 0.86,
|
15448 |
+
"grad_norm": 1.0383694171905518,
|
15449 |
+
"learning_rate": 7.228039557882491e-06,
|
15450 |
+
"loss": 5.3404,
|
15451 |
+
"step": 8824
|
15452 |
+
},
|
15453 |
+
{
|
15454 |
+
"epoch": 0.86,
|
15455 |
+
"grad_norm": 1.039699673652649,
|
15456 |
+
"learning_rate": 7.2086484390149315e-06,
|
15457 |
+
"loss": 5.2846,
|
15458 |
+
"step": 8828
|
15459 |
+
},
|
15460 |
+
{
|
15461 |
+
"epoch": 0.86,
|
15462 |
+
"grad_norm": 1.1256271600723267,
|
15463 |
+
"learning_rate": 7.189257320147373e-06,
|
15464 |
+
"loss": 5.327,
|
15465 |
+
"step": 8832
|
15466 |
+
},
|
15467 |
+
{
|
15468 |
+
"epoch": 0.86,
|
15469 |
+
"grad_norm": 0.9789720177650452,
|
15470 |
+
"learning_rate": 7.169866201279813e-06,
|
15471 |
+
"loss": 5.3369,
|
15472 |
+
"step": 8836
|
15473 |
+
},
|
15474 |
+
{
|
15475 |
+
"epoch": 0.86,
|
15476 |
+
"grad_norm": 1.0387988090515137,
|
15477 |
+
"learning_rate": 7.150475082412256e-06,
|
15478 |
+
"loss": 5.3402,
|
15479 |
+
"step": 8840
|
15480 |
+
},
|
15481 |
+
{
|
15482 |
+
"epoch": 0.86,
|
15483 |
+
"grad_norm": 1.0737075805664062,
|
15484 |
+
"learning_rate": 7.131083963544696e-06,
|
15485 |
+
"loss": 5.2837,
|
15486 |
+
"step": 8844
|
15487 |
+
},
|
15488 |
+
{
|
15489 |
+
"epoch": 0.86,
|
15490 |
+
"grad_norm": 1.1215327978134155,
|
15491 |
+
"learning_rate": 7.111692844677138e-06,
|
15492 |
+
"loss": 5.2237,
|
15493 |
+
"step": 8848
|
15494 |
+
},
|
15495 |
+
{
|
15496 |
+
"epoch": 0.86,
|
15497 |
+
"grad_norm": 1.0533177852630615,
|
15498 |
+
"learning_rate": 7.092301725809579e-06,
|
15499 |
+
"loss": 5.3783,
|
15500 |
+
"step": 8852
|
15501 |
+
},
|
15502 |
+
{
|
15503 |
+
"epoch": 0.86,
|
15504 |
+
"grad_norm": 1.1658439636230469,
|
15505 |
+
"learning_rate": 7.072910606942021e-06,
|
15506 |
+
"loss": 5.3175,
|
15507 |
+
"step": 8856
|
15508 |
+
},
|
15509 |
+
{
|
15510 |
+
"epoch": 0.86,
|
15511 |
+
"grad_norm": 1.0966906547546387,
|
15512 |
+
"learning_rate": 7.0535194880744625e-06,
|
15513 |
+
"loss": 5.4216,
|
15514 |
+
"step": 8860
|
15515 |
+
},
|
15516 |
+
{
|
15517 |
+
"epoch": 0.86,
|
15518 |
+
"grad_norm": 1.0952768325805664,
|
15519 |
+
"learning_rate": 7.034128369206904e-06,
|
15520 |
+
"loss": 5.2748,
|
15521 |
+
"step": 8864
|
15522 |
+
},
|
15523 |
+
{
|
15524 |
+
"epoch": 0.86,
|
15525 |
+
"grad_norm": 1.096529483795166,
|
15526 |
+
"learning_rate": 7.014737250339345e-06,
|
15527 |
+
"loss": 5.1666,
|
15528 |
+
"step": 8868
|
15529 |
+
},
|
15530 |
+
{
|
15531 |
+
"epoch": 0.86,
|
15532 |
+
"grad_norm": 1.0736936330795288,
|
15533 |
+
"learning_rate": 6.995346131471786e-06,
|
15534 |
+
"loss": 5.24,
|
15535 |
+
"step": 8872
|
15536 |
+
},
|
15537 |
+
{
|
15538 |
+
"epoch": 0.86,
|
15539 |
+
"grad_norm": 1.0133376121520996,
|
15540 |
+
"learning_rate": 6.975955012604228e-06,
|
15541 |
+
"loss": 5.2827,
|
15542 |
+
"step": 8876
|
15543 |
+
},
|
15544 |
+
{
|
15545 |
+
"epoch": 0.86,
|
15546 |
+
"grad_norm": 1.0580708980560303,
|
15547 |
+
"learning_rate": 6.956563893736669e-06,
|
15548 |
+
"loss": 5.3801,
|
15549 |
+
"step": 8880
|
15550 |
+
},
|
15551 |
+
{
|
15552 |
+
"epoch": 0.86,
|
15553 |
+
"grad_norm": 1.1220327615737915,
|
15554 |
+
"learning_rate": 6.937172774869111e-06,
|
15555 |
+
"loss": 5.2949,
|
15556 |
+
"step": 8884
|
15557 |
+
},
|
15558 |
+
{
|
15559 |
+
"epoch": 0.86,
|
15560 |
+
"grad_norm": 1.136806845664978,
|
15561 |
+
"learning_rate": 6.917781656001551e-06,
|
15562 |
+
"loss": 5.347,
|
15563 |
+
"step": 8888
|
15564 |
+
},
|
15565 |
+
{
|
15566 |
+
"epoch": 0.86,
|
15567 |
+
"grad_norm": 1.1147714853286743,
|
15568 |
+
"learning_rate": 6.898390537133994e-06,
|
15569 |
+
"loss": 5.2648,
|
15570 |
+
"step": 8892
|
15571 |
+
},
|
15572 |
+
{
|
15573 |
+
"epoch": 0.86,
|
15574 |
+
"grad_norm": 1.0525692701339722,
|
15575 |
+
"learning_rate": 6.878999418266434e-06,
|
15576 |
+
"loss": 5.2665,
|
15577 |
+
"step": 8896
|
15578 |
+
},
|
15579 |
+
{
|
15580 |
+
"epoch": 0.86,
|
15581 |
+
"grad_norm": 1.0400636196136475,
|
15582 |
+
"learning_rate": 6.8596082993988755e-06,
|
15583 |
+
"loss": 5.3571,
|
15584 |
+
"step": 8900
|
15585 |
+
},
|
15586 |
+
{
|
15587 |
+
"epoch": 0.86,
|
15588 |
+
"grad_norm": 1.0699836015701294,
|
15589 |
+
"learning_rate": 6.840217180531316e-06,
|
15590 |
+
"loss": 5.3079,
|
15591 |
+
"step": 8904
|
15592 |
+
},
|
15593 |
+
{
|
15594 |
+
"epoch": 0.86,
|
15595 |
+
"grad_norm": 1.0171644687652588,
|
15596 |
+
"learning_rate": 6.820826061663758e-06,
|
15597 |
+
"loss": 5.3438,
|
15598 |
+
"step": 8908
|
15599 |
+
},
|
15600 |
+
{
|
15601 |
+
"epoch": 0.86,
|
15602 |
+
"grad_norm": 1.0426756143569946,
|
15603 |
+
"learning_rate": 6.801434942796199e-06,
|
15604 |
+
"loss": 5.417,
|
15605 |
+
"step": 8912
|
15606 |
+
},
|
15607 |
+
{
|
15608 |
+
"epoch": 0.86,
|
15609 |
+
"grad_norm": 1.1138461828231812,
|
15610 |
+
"learning_rate": 6.782043823928641e-06,
|
15611 |
+
"loss": 5.2769,
|
15612 |
+
"step": 8916
|
15613 |
+
},
|
15614 |
+
{
|
15615 |
+
"epoch": 0.86,
|
15616 |
+
"grad_norm": 1.0219694375991821,
|
15617 |
+
"learning_rate": 6.762652705061083e-06,
|
15618 |
+
"loss": 5.2811,
|
15619 |
+
"step": 8920
|
15620 |
+
},
|
15621 |
+
{
|
15622 |
+
"epoch": 0.87,
|
15623 |
+
"grad_norm": 1.1686511039733887,
|
15624 |
+
"learning_rate": 6.743261586193524e-06,
|
15625 |
+
"loss": 5.3218,
|
15626 |
+
"step": 8924
|
15627 |
+
},
|
15628 |
+
{
|
15629 |
+
"epoch": 0.87,
|
15630 |
+
"grad_norm": 1.0407147407531738,
|
15631 |
+
"learning_rate": 6.723870467325965e-06,
|
15632 |
+
"loss": 5.339,
|
15633 |
+
"step": 8928
|
15634 |
+
},
|
15635 |
+
{
|
15636 |
+
"epoch": 0.87,
|
15637 |
+
"grad_norm": 1.1056681871414185,
|
15638 |
+
"learning_rate": 6.704479348458407e-06,
|
15639 |
+
"loss": 5.2758,
|
15640 |
+
"step": 8932
|
15641 |
+
},
|
15642 |
+
{
|
15643 |
+
"epoch": 0.87,
|
15644 |
+
"grad_norm": 1.0969740152359009,
|
15645 |
+
"learning_rate": 6.685088229590848e-06,
|
15646 |
+
"loss": 5.2432,
|
15647 |
+
"step": 8936
|
15648 |
+
},
|
15649 |
+
{
|
15650 |
+
"epoch": 0.87,
|
15651 |
+
"grad_norm": 0.9841113090515137,
|
15652 |
+
"learning_rate": 6.6656971107232885e-06,
|
15653 |
+
"loss": 5.2596,
|
15654 |
+
"step": 8940
|
15655 |
+
},
|
15656 |
+
{
|
15657 |
+
"epoch": 0.87,
|
15658 |
+
"grad_norm": 1.1172292232513428,
|
15659 |
+
"learning_rate": 6.646305991855731e-06,
|
15660 |
+
"loss": 5.2884,
|
15661 |
+
"step": 8944
|
15662 |
+
},
|
15663 |
+
{
|
15664 |
+
"epoch": 0.87,
|
15665 |
+
"grad_norm": 0.9936596155166626,
|
15666 |
+
"learning_rate": 6.626914872988171e-06,
|
15667 |
+
"loss": 5.2968,
|
15668 |
+
"step": 8948
|
15669 |
+
},
|
15670 |
+
{
|
15671 |
+
"epoch": 0.87,
|
15672 |
+
"grad_norm": 1.0389301776885986,
|
15673 |
+
"learning_rate": 6.6075237541206135e-06,
|
15674 |
+
"loss": 5.2827,
|
15675 |
+
"step": 8952
|
15676 |
+
},
|
15677 |
+
{
|
15678 |
+
"epoch": 0.87,
|
15679 |
+
"grad_norm": 1.020494818687439,
|
15680 |
+
"learning_rate": 6.588132635253054e-06,
|
15681 |
+
"loss": 5.3882,
|
15682 |
+
"step": 8956
|
15683 |
+
},
|
15684 |
+
{
|
15685 |
+
"epoch": 0.87,
|
15686 |
+
"grad_norm": 1.0391160249710083,
|
15687 |
+
"learning_rate": 6.568741516385496e-06,
|
15688 |
+
"loss": 5.2172,
|
15689 |
+
"step": 8960
|
15690 |
+
},
|
15691 |
+
{
|
15692 |
+
"epoch": 0.87,
|
15693 |
+
"grad_norm": 1.0213825702667236,
|
15694 |
+
"learning_rate": 6.549350397517937e-06,
|
15695 |
+
"loss": 5.3307,
|
15696 |
+
"step": 8964
|
15697 |
+
},
|
15698 |
+
{
|
15699 |
+
"epoch": 0.87,
|
15700 |
+
"grad_norm": 1.0745649337768555,
|
15701 |
+
"learning_rate": 6.529959278650378e-06,
|
15702 |
+
"loss": 5.2638,
|
15703 |
+
"step": 8968
|
15704 |
+
},
|
15705 |
+
{
|
15706 |
+
"epoch": 0.87,
|
15707 |
+
"grad_norm": 1.0567609071731567,
|
15708 |
+
"learning_rate": 6.510568159782819e-06,
|
15709 |
+
"loss": 5.3164,
|
15710 |
+
"step": 8972
|
15711 |
+
},
|
15712 |
+
{
|
15713 |
+
"epoch": 0.87,
|
15714 |
+
"grad_norm": 1.0450811386108398,
|
15715 |
+
"learning_rate": 6.491177040915261e-06,
|
15716 |
+
"loss": 5.3648,
|
15717 |
+
"step": 8976
|
15718 |
+
},
|
15719 |
+
{
|
15720 |
+
"epoch": 0.87,
|
15721 |
+
"grad_norm": 1.0880790948867798,
|
15722 |
+
"learning_rate": 6.471785922047703e-06,
|
15723 |
+
"loss": 5.3617,
|
15724 |
+
"step": 8980
|
15725 |
+
},
|
15726 |
+
{
|
15727 |
+
"epoch": 0.87,
|
15728 |
+
"grad_norm": 1.0606417655944824,
|
15729 |
+
"learning_rate": 6.452394803180144e-06,
|
15730 |
+
"loss": 5.2201,
|
15731 |
+
"step": 8984
|
15732 |
+
},
|
15733 |
+
{
|
15734 |
+
"epoch": 0.87,
|
15735 |
+
"grad_norm": 1.0124664306640625,
|
15736 |
+
"learning_rate": 6.433003684312586e-06,
|
15737 |
+
"loss": 5.2499,
|
15738 |
+
"step": 8988
|
15739 |
+
},
|
15740 |
+
{
|
15741 |
+
"epoch": 0.87,
|
15742 |
+
"grad_norm": 1.1352604627609253,
|
15743 |
+
"learning_rate": 6.4136125654450265e-06,
|
15744 |
+
"loss": 5.3782,
|
15745 |
+
"step": 8992
|
15746 |
+
},
|
15747 |
+
{
|
15748 |
+
"epoch": 0.87,
|
15749 |
+
"grad_norm": 1.1061619520187378,
|
15750 |
+
"learning_rate": 6.394221446577468e-06,
|
15751 |
+
"loss": 5.3408,
|
15752 |
+
"step": 8996
|
15753 |
+
},
|
15754 |
+
{
|
15755 |
+
"epoch": 0.87,
|
15756 |
+
"grad_norm": 1.1135718822479248,
|
15757 |
+
"learning_rate": 6.374830327709909e-06,
|
15758 |
+
"loss": 5.2323,
|
15759 |
+
"step": 9000
|
15760 |
+
},
|
15761 |
+
{
|
15762 |
+
"epoch": 0.87,
|
15763 |
+
"grad_norm": 1.0155010223388672,
|
15764 |
+
"learning_rate": 6.355439208842351e-06,
|
15765 |
+
"loss": 5.2942,
|
15766 |
+
"step": 9004
|
15767 |
+
},
|
15768 |
+
{
|
15769 |
+
"epoch": 0.87,
|
15770 |
+
"grad_norm": 1.044931173324585,
|
15771 |
+
"learning_rate": 6.336048089974791e-06,
|
15772 |
+
"loss": 5.3267,
|
15773 |
+
"step": 9008
|
15774 |
+
},
|
15775 |
+
{
|
15776 |
+
"epoch": 0.87,
|
15777 |
+
"grad_norm": 1.0643398761749268,
|
15778 |
+
"learning_rate": 6.316656971107233e-06,
|
15779 |
+
"loss": 5.2651,
|
15780 |
+
"step": 9012
|
15781 |
+
},
|
15782 |
+
{
|
15783 |
+
"epoch": 0.87,
|
15784 |
+
"grad_norm": 1.0203381776809692,
|
15785 |
+
"learning_rate": 6.297265852239674e-06,
|
15786 |
+
"loss": 5.3493,
|
15787 |
+
"step": 9016
|
15788 |
+
},
|
15789 |
+
{
|
15790 |
+
"epoch": 0.87,
|
15791 |
+
"grad_norm": 1.0563126802444458,
|
15792 |
+
"learning_rate": 6.277874733372116e-06,
|
15793 |
+
"loss": 5.362,
|
15794 |
+
"step": 9020
|
15795 |
+
},
|
15796 |
+
{
|
15797 |
+
"epoch": 0.87,
|
15798 |
+
"grad_norm": 1.0667084455490112,
|
15799 |
+
"learning_rate": 6.258483614504557e-06,
|
15800 |
+
"loss": 5.2151,
|
15801 |
+
"step": 9024
|
15802 |
+
},
|
15803 |
+
{
|
15804 |
+
"epoch": 0.88,
|
15805 |
+
"grad_norm": 1.0885251760482788,
|
15806 |
+
"learning_rate": 6.239092495636999e-06,
|
15807 |
+
"loss": 5.3404,
|
15808 |
+
"step": 9028
|
15809 |
+
},
|
15810 |
+
{
|
15811 |
+
"epoch": 0.88,
|
15812 |
+
"grad_norm": 1.0210630893707275,
|
15813 |
+
"learning_rate": 6.21970137676944e-06,
|
15814 |
+
"loss": 5.2937,
|
15815 |
+
"step": 9032
|
15816 |
+
},
|
15817 |
+
{
|
15818 |
+
"epoch": 0.88,
|
15819 |
+
"grad_norm": 1.1303844451904297,
|
15820 |
+
"learning_rate": 6.200310257901881e-06,
|
15821 |
+
"loss": 5.3373,
|
15822 |
+
"step": 9036
|
15823 |
+
},
|
15824 |
+
{
|
15825 |
+
"epoch": 0.88,
|
15826 |
+
"grad_norm": 1.1023499965667725,
|
15827 |
+
"learning_rate": 6.180919139034322e-06,
|
15828 |
+
"loss": 5.2946,
|
15829 |
+
"step": 9040
|
15830 |
+
},
|
15831 |
+
{
|
15832 |
+
"epoch": 0.88,
|
15833 |
+
"grad_norm": 1.0469759702682495,
|
15834 |
+
"learning_rate": 6.161528020166764e-06,
|
15835 |
+
"loss": 5.3265,
|
15836 |
+
"step": 9044
|
15837 |
+
},
|
15838 |
+
{
|
15839 |
+
"epoch": 0.88,
|
15840 |
+
"grad_norm": 0.9917576313018799,
|
15841 |
+
"learning_rate": 6.142136901299205e-06,
|
15842 |
+
"loss": 5.2984,
|
15843 |
+
"step": 9048
|
15844 |
+
},
|
15845 |
+
{
|
15846 |
+
"epoch": 0.88,
|
15847 |
+
"grad_norm": 1.0630229711532593,
|
15848 |
+
"learning_rate": 6.122745782431646e-06,
|
15849 |
+
"loss": 5.2025,
|
15850 |
+
"step": 9052
|
15851 |
+
},
|
15852 |
+
{
|
15853 |
+
"epoch": 0.88,
|
15854 |
+
"grad_norm": 1.0385984182357788,
|
15855 |
+
"learning_rate": 6.103354663564088e-06,
|
15856 |
+
"loss": 5.2934,
|
15857 |
+
"step": 9056
|
15858 |
+
},
|
15859 |
+
{
|
15860 |
+
"epoch": 0.88,
|
15861 |
+
"grad_norm": 1.0480278730392456,
|
15862 |
+
"learning_rate": 6.083963544696529e-06,
|
15863 |
+
"loss": 5.2933,
|
15864 |
+
"step": 9060
|
15865 |
+
},
|
15866 |
+
{
|
15867 |
+
"epoch": 0.88,
|
15868 |
+
"grad_norm": 1.0395824909210205,
|
15869 |
+
"learning_rate": 6.0645724258289706e-06,
|
15870 |
+
"loss": 5.267,
|
15871 |
+
"step": 9064
|
15872 |
+
},
|
15873 |
+
{
|
15874 |
+
"epoch": 0.88,
|
15875 |
+
"grad_norm": 1.0996421575546265,
|
15876 |
+
"learning_rate": 6.045181306961412e-06,
|
15877 |
+
"loss": 5.3497,
|
15878 |
+
"step": 9068
|
15879 |
+
},
|
15880 |
+
{
|
15881 |
+
"epoch": 0.88,
|
15882 |
+
"grad_norm": 1.0920591354370117,
|
15883 |
+
"learning_rate": 6.025790188093853e-06,
|
15884 |
+
"loss": 5.2752,
|
15885 |
+
"step": 9072
|
15886 |
+
},
|
15887 |
+
{
|
15888 |
+
"epoch": 0.88,
|
15889 |
+
"grad_norm": 1.0730946063995361,
|
15890 |
+
"learning_rate": 6.006399069226295e-06,
|
15891 |
+
"loss": 5.3234,
|
15892 |
+
"step": 9076
|
15893 |
+
},
|
15894 |
+
{
|
15895 |
+
"epoch": 0.88,
|
15896 |
+
"grad_norm": 1.077646017074585,
|
15897 |
+
"learning_rate": 5.987007950358736e-06,
|
15898 |
+
"loss": 5.3009,
|
15899 |
+
"step": 9080
|
15900 |
+
},
|
15901 |
+
{
|
15902 |
+
"epoch": 0.88,
|
15903 |
+
"grad_norm": 1.0901986360549927,
|
15904 |
+
"learning_rate": 5.9676168314911775e-06,
|
15905 |
+
"loss": 5.2653,
|
15906 |
+
"step": 9084
|
15907 |
+
},
|
15908 |
+
{
|
15909 |
+
"epoch": 0.88,
|
15910 |
+
"grad_norm": 1.1307499408721924,
|
15911 |
+
"learning_rate": 5.948225712623619e-06,
|
15912 |
+
"loss": 5.393,
|
15913 |
+
"step": 9088
|
15914 |
+
},
|
15915 |
+
{
|
15916 |
+
"epoch": 0.88,
|
15917 |
+
"grad_norm": 1.1002899408340454,
|
15918 |
+
"learning_rate": 5.92883459375606e-06,
|
15919 |
+
"loss": 5.2785,
|
15920 |
+
"step": 9092
|
15921 |
+
},
|
15922 |
+
{
|
15923 |
+
"epoch": 0.88,
|
15924 |
+
"grad_norm": 1.1348730325698853,
|
15925 |
+
"learning_rate": 5.909443474888502e-06,
|
15926 |
+
"loss": 5.3749,
|
15927 |
+
"step": 9096
|
15928 |
+
},
|
15929 |
+
{
|
15930 |
+
"epoch": 0.88,
|
15931 |
+
"grad_norm": 1.2404577732086182,
|
15932 |
+
"learning_rate": 5.890052356020943e-06,
|
15933 |
+
"loss": 5.2968,
|
15934 |
+
"step": 9100
|
15935 |
+
},
|
15936 |
+
{
|
15937 |
+
"epoch": 0.88,
|
15938 |
+
"grad_norm": 1.1860145330429077,
|
15939 |
+
"learning_rate": 5.8706612371533835e-06,
|
15940 |
+
"loss": 5.2951,
|
15941 |
+
"step": 9104
|
15942 |
+
},
|
15943 |
+
{
|
15944 |
+
"epoch": 0.88,
|
15945 |
+
"grad_norm": 1.0747588872909546,
|
15946 |
+
"learning_rate": 5.851270118285825e-06,
|
15947 |
+
"loss": 5.2725,
|
15948 |
+
"step": 9108
|
15949 |
+
},
|
15950 |
+
{
|
15951 |
+
"epoch": 0.88,
|
15952 |
+
"grad_norm": 1.0642809867858887,
|
15953 |
+
"learning_rate": 5.831878999418266e-06,
|
15954 |
+
"loss": 5.3347,
|
15955 |
+
"step": 9112
|
15956 |
+
},
|
15957 |
+
{
|
15958 |
+
"epoch": 0.88,
|
15959 |
+
"grad_norm": 1.0361077785491943,
|
15960 |
+
"learning_rate": 5.812487880550708e-06,
|
15961 |
+
"loss": 5.2899,
|
15962 |
+
"step": 9116
|
15963 |
+
},
|
15964 |
+
{
|
15965 |
+
"epoch": 0.88,
|
15966 |
+
"grad_norm": 1.027950406074524,
|
15967 |
+
"learning_rate": 5.793096761683149e-06,
|
15968 |
+
"loss": 5.3016,
|
15969 |
+
"step": 9120
|
15970 |
+
},
|
15971 |
+
{
|
15972 |
+
"epoch": 0.88,
|
15973 |
+
"grad_norm": 1.0341978073120117,
|
15974 |
+
"learning_rate": 5.7737056428155905e-06,
|
15975 |
+
"loss": 5.3066,
|
15976 |
+
"step": 9124
|
15977 |
+
},
|
15978 |
+
{
|
15979 |
+
"epoch": 0.89,
|
15980 |
+
"grad_norm": 1.0980345010757446,
|
15981 |
+
"learning_rate": 5.754314523948033e-06,
|
15982 |
+
"loss": 5.2685,
|
15983 |
+
"step": 9128
|
15984 |
+
},
|
15985 |
+
{
|
15986 |
+
"epoch": 0.89,
|
15987 |
+
"grad_norm": 1.0036580562591553,
|
15988 |
+
"learning_rate": 5.734923405080474e-06,
|
15989 |
+
"loss": 5.2972,
|
15990 |
+
"step": 9132
|
15991 |
+
},
|
15992 |
+
{
|
15993 |
+
"epoch": 0.89,
|
15994 |
+
"grad_norm": 1.0951625108718872,
|
15995 |
+
"learning_rate": 5.715532286212915e-06,
|
15996 |
+
"loss": 5.2363,
|
15997 |
+
"step": 9136
|
15998 |
+
},
|
15999 |
+
{
|
16000 |
+
"epoch": 0.89,
|
16001 |
+
"grad_norm": 1.010358214378357,
|
16002 |
+
"learning_rate": 5.696141167345356e-06,
|
16003 |
+
"loss": 5.2715,
|
16004 |
+
"step": 9140
|
16005 |
+
},
|
16006 |
+
{
|
16007 |
+
"epoch": 0.89,
|
16008 |
+
"grad_norm": 1.0721516609191895,
|
16009 |
+
"learning_rate": 5.676750048477797e-06,
|
16010 |
+
"loss": 5.3006,
|
16011 |
+
"step": 9144
|
16012 |
+
},
|
16013 |
+
{
|
16014 |
+
"epoch": 0.89,
|
16015 |
+
"grad_norm": 0.9922645688056946,
|
16016 |
+
"learning_rate": 5.657358929610239e-06,
|
16017 |
+
"loss": 5.2985,
|
16018 |
+
"step": 9148
|
16019 |
+
},
|
16020 |
+
{
|
16021 |
+
"epoch": 0.89,
|
16022 |
+
"grad_norm": 1.0958447456359863,
|
16023 |
+
"learning_rate": 5.63796781074268e-06,
|
16024 |
+
"loss": 5.2388,
|
16025 |
+
"step": 9152
|
16026 |
+
},
|
16027 |
+
{
|
16028 |
+
"epoch": 0.89,
|
16029 |
+
"grad_norm": 0.9977266788482666,
|
16030 |
+
"learning_rate": 5.6185766918751215e-06,
|
16031 |
+
"loss": 5.4273,
|
16032 |
+
"step": 9156
|
16033 |
+
},
|
16034 |
+
{
|
16035 |
+
"epoch": 0.89,
|
16036 |
+
"grad_norm": 1.1025915145874023,
|
16037 |
+
"learning_rate": 5.599185573007563e-06,
|
16038 |
+
"loss": 5.3768,
|
16039 |
+
"step": 9160
|
16040 |
+
},
|
16041 |
+
{
|
16042 |
+
"epoch": 0.89,
|
16043 |
+
"grad_norm": 1.0521866083145142,
|
16044 |
+
"learning_rate": 5.579794454140004e-06,
|
16045 |
+
"loss": 5.2555,
|
16046 |
+
"step": 9164
|
16047 |
+
},
|
16048 |
+
{
|
16049 |
+
"epoch": 0.89,
|
16050 |
+
"grad_norm": 1.0546320676803589,
|
16051 |
+
"learning_rate": 5.560403335272446e-06,
|
16052 |
+
"loss": 5.259,
|
16053 |
+
"step": 9168
|
16054 |
+
},
|
16055 |
+
{
|
16056 |
+
"epoch": 0.89,
|
16057 |
+
"grad_norm": 1.084153413772583,
|
16058 |
+
"learning_rate": 5.541012216404887e-06,
|
16059 |
+
"loss": 5.2733,
|
16060 |
+
"step": 9172
|
16061 |
+
},
|
16062 |
+
{
|
16063 |
+
"epoch": 0.89,
|
16064 |
+
"grad_norm": 1.1298420429229736,
|
16065 |
+
"learning_rate": 5.521621097537328e-06,
|
16066 |
+
"loss": 5.2499,
|
16067 |
+
"step": 9176
|
16068 |
+
},
|
16069 |
+
{
|
16070 |
+
"epoch": 0.89,
|
16071 |
+
"grad_norm": 1.0296047925949097,
|
16072 |
+
"learning_rate": 5.502229978669769e-06,
|
16073 |
+
"loss": 5.3293,
|
16074 |
+
"step": 9180
|
16075 |
+
},
|
16076 |
+
{
|
16077 |
+
"epoch": 0.89,
|
16078 |
+
"grad_norm": 1.0231281518936157,
|
16079 |
+
"learning_rate": 5.48283885980221e-06,
|
16080 |
+
"loss": 5.2614,
|
16081 |
+
"step": 9184
|
16082 |
+
},
|
16083 |
+
{
|
16084 |
+
"epoch": 0.89,
|
16085 |
+
"grad_norm": 1.1063759326934814,
|
16086 |
+
"learning_rate": 5.463447740934653e-06,
|
16087 |
+
"loss": 5.3569,
|
16088 |
+
"step": 9188
|
16089 |
+
},
|
16090 |
+
{
|
16091 |
+
"epoch": 0.89,
|
16092 |
+
"grad_norm": 1.0762827396392822,
|
16093 |
+
"learning_rate": 5.444056622067094e-06,
|
16094 |
+
"loss": 5.3734,
|
16095 |
+
"step": 9192
|
16096 |
+
},
|
16097 |
+
{
|
16098 |
+
"epoch": 0.89,
|
16099 |
+
"grad_norm": 1.0667394399642944,
|
16100 |
+
"learning_rate": 5.424665503199535e-06,
|
16101 |
+
"loss": 5.3136,
|
16102 |
+
"step": 9196
|
16103 |
+
},
|
16104 |
+
{
|
16105 |
+
"epoch": 0.89,
|
16106 |
+
"grad_norm": 1.0663567781448364,
|
16107 |
+
"learning_rate": 5.405274384331977e-06,
|
16108 |
+
"loss": 5.2178,
|
16109 |
+
"step": 9200
|
16110 |
+
},
|
16111 |
+
{
|
16112 |
+
"epoch": 0.89,
|
16113 |
+
"grad_norm": 1.1014020442962646,
|
16114 |
+
"learning_rate": 5.385883265464417e-06,
|
16115 |
+
"loss": 5.3121,
|
16116 |
+
"step": 9204
|
16117 |
+
},
|
16118 |
+
{
|
16119 |
+
"epoch": 0.89,
|
16120 |
+
"grad_norm": 1.0944761037826538,
|
16121 |
+
"learning_rate": 5.366492146596859e-06,
|
16122 |
+
"loss": 5.3321,
|
16123 |
+
"step": 9208
|
16124 |
+
},
|
16125 |
+
{
|
16126 |
+
"epoch": 0.89,
|
16127 |
+
"grad_norm": 1.0576825141906738,
|
16128 |
+
"learning_rate": 5.3471010277293e-06,
|
16129 |
+
"loss": 5.187,
|
16130 |
+
"step": 9212
|
16131 |
+
},
|
16132 |
+
{
|
16133 |
+
"epoch": 0.89,
|
16134 |
+
"grad_norm": 1.102414608001709,
|
16135 |
+
"learning_rate": 5.3277099088617414e-06,
|
16136 |
+
"loss": 5.2198,
|
16137 |
+
"step": 9216
|
16138 |
+
},
|
16139 |
+
{
|
16140 |
+
"epoch": 0.89,
|
16141 |
+
"grad_norm": 1.0515443086624146,
|
16142 |
+
"learning_rate": 5.308318789994183e-06,
|
16143 |
+
"loss": 5.3509,
|
16144 |
+
"step": 9220
|
16145 |
+
},
|
16146 |
+
{
|
16147 |
+
"epoch": 0.89,
|
16148 |
+
"grad_norm": 1.0225639343261719,
|
16149 |
+
"learning_rate": 5.288927671126624e-06,
|
16150 |
+
"loss": 5.3264,
|
16151 |
+
"step": 9224
|
16152 |
+
},
|
16153 |
+
{
|
16154 |
+
"epoch": 0.89,
|
16155 |
+
"grad_norm": 1.0482409000396729,
|
16156 |
+
"learning_rate": 5.269536552259066e-06,
|
16157 |
+
"loss": 5.314,
|
16158 |
+
"step": 9228
|
16159 |
+
},
|
16160 |
+
{
|
16161 |
+
"epoch": 0.9,
|
16162 |
+
"grad_norm": 1.0829366445541382,
|
16163 |
+
"learning_rate": 5.250145433391507e-06,
|
16164 |
+
"loss": 5.3288,
|
16165 |
+
"step": 9232
|
16166 |
+
},
|
16167 |
+
{
|
16168 |
+
"epoch": 0.9,
|
16169 |
+
"grad_norm": 1.018813967704773,
|
16170 |
+
"learning_rate": 5.230754314523948e-06,
|
16171 |
+
"loss": 5.245,
|
16172 |
+
"step": 9236
|
16173 |
+
},
|
16174 |
+
{
|
16175 |
+
"epoch": 0.9,
|
16176 |
+
"grad_norm": 1.1042280197143555,
|
16177 |
+
"learning_rate": 5.21136319565639e-06,
|
16178 |
+
"loss": 5.3871,
|
16179 |
+
"step": 9240
|
16180 |
+
},
|
16181 |
+
{
|
16182 |
+
"epoch": 0.9,
|
16183 |
+
"grad_norm": 1.0457403659820557,
|
16184 |
+
"learning_rate": 5.19197207678883e-06,
|
16185 |
+
"loss": 5.3858,
|
16186 |
+
"step": 9244
|
16187 |
+
},
|
16188 |
+
{
|
16189 |
+
"epoch": 0.9,
|
16190 |
+
"grad_norm": 1.0873547792434692,
|
16191 |
+
"learning_rate": 5.1725809579212725e-06,
|
16192 |
+
"loss": 5.2907,
|
16193 |
+
"step": 9248
|
16194 |
+
},
|
16195 |
+
{
|
16196 |
+
"epoch": 0.9,
|
16197 |
+
"grad_norm": 1.0615798234939575,
|
16198 |
+
"learning_rate": 5.153189839053714e-06,
|
16199 |
+
"loss": 5.2729,
|
16200 |
+
"step": 9252
|
16201 |
+
},
|
16202 |
+
{
|
16203 |
+
"epoch": 0.9,
|
16204 |
+
"grad_norm": 1.1086784601211548,
|
16205 |
+
"learning_rate": 5.133798720186155e-06,
|
16206 |
+
"loss": 5.388,
|
16207 |
+
"step": 9256
|
16208 |
+
},
|
16209 |
+
{
|
16210 |
+
"epoch": 0.9,
|
16211 |
+
"grad_norm": 1.0895764827728271,
|
16212 |
+
"learning_rate": 5.114407601318597e-06,
|
16213 |
+
"loss": 5.2892,
|
16214 |
+
"step": 9260
|
16215 |
+
},
|
16216 |
+
{
|
16217 |
+
"epoch": 0.9,
|
16218 |
+
"grad_norm": 1.0369954109191895,
|
16219 |
+
"learning_rate": 5.095016482451038e-06,
|
16220 |
+
"loss": 5.2989,
|
16221 |
+
"step": 9264
|
16222 |
+
},
|
16223 |
+
{
|
16224 |
+
"epoch": 0.9,
|
16225 |
+
"grad_norm": 1.049892544746399,
|
16226 |
+
"learning_rate": 5.0756253635834794e-06,
|
16227 |
+
"loss": 5.3419,
|
16228 |
+
"step": 9268
|
16229 |
+
},
|
16230 |
+
{
|
16231 |
+
"epoch": 0.9,
|
16232 |
+
"grad_norm": 1.0920242071151733,
|
16233 |
+
"learning_rate": 5.056234244715921e-06,
|
16234 |
+
"loss": 5.3116,
|
16235 |
+
"step": 9272
|
16236 |
+
},
|
16237 |
+
{
|
16238 |
+
"epoch": 0.9,
|
16239 |
+
"grad_norm": 1.0999177694320679,
|
16240 |
+
"learning_rate": 5.036843125848361e-06,
|
16241 |
+
"loss": 5.2779,
|
16242 |
+
"step": 9276
|
16243 |
+
},
|
16244 |
+
{
|
16245 |
+
"epoch": 0.9,
|
16246 |
+
"grad_norm": 1.0239474773406982,
|
16247 |
+
"learning_rate": 5.017452006980803e-06,
|
16248 |
+
"loss": 5.35,
|
16249 |
+
"step": 9280
|
16250 |
+
},
|
16251 |
+
{
|
16252 |
+
"epoch": 0.9,
|
16253 |
+
"grad_norm": 1.0571128129959106,
|
16254 |
+
"learning_rate": 4.998060888113244e-06,
|
16255 |
+
"loss": 5.3139,
|
16256 |
+
"step": 9284
|
16257 |
+
},
|
16258 |
+
{
|
16259 |
+
"epoch": 0.9,
|
16260 |
+
"grad_norm": 1.1082773208618164,
|
16261 |
+
"learning_rate": 4.9786697692456855e-06,
|
16262 |
+
"loss": 5.2779,
|
16263 |
+
"step": 9288
|
16264 |
}
|
16265 |
],
|
16266 |
"logging_steps": 4,
|
|
|
16268 |
"num_input_tokens_seen": 0,
|
16269 |
"num_train_epochs": 1,
|
16270 |
"save_steps": 1032,
|
16271 |
+
"total_flos": 7.822868224986317e+16,
|
16272 |
"train_batch_size": 8,
|
16273 |
"trial_name": null,
|
16274 |
"trial_params": null
|