Training in progress, step 2100, checkpoint
Browse files- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2103 -3
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4903351912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd1eca6c800ef6825e7bf60a3f606c08a3664621930ae86599328032ddfc0a0a
|
3 |
size 4903351912
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4947570872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e4678c8c596ac9adbe11b26a3e6cf61fda2a78b8c8aa1d36e117e0b4b9dd9b8
|
3 |
size 4947570872
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4962221464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be4ffd13d9c3bc7db9d2f6de30ddc485decf506fd2322157d81e0b581d71538a
|
3 |
size 4962221464
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3670322200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b620e3f5d53efc08164bcbe9887fa6c2b5101cde21efdb03ca2b3fcf5b3b67ad
|
3 |
size 3670322200
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f485548cbbc71688a7015971c1a5ece38765abc0497df7c8a97a11895a5364a5
|
3 |
size 2216
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:459e4f6348b09807a678fef615a08119a6cb3d845b8e137dc15cc9629d38a1d8
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12607,6 +12607,2106 @@
|
|
12607 |
"learning_rate": 2.6460419706234897e-06,
|
12608 |
"loss": 1.1064,
|
12609 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12610 |
}
|
12611 |
],
|
12612 |
"logging_steps": 1,
|
@@ -12626,7 +14726,7 @@
|
|
12626 |
"attributes": {}
|
12627 |
}
|
12628 |
},
|
12629 |
-
"total_flos":
|
12630 |
"train_batch_size": 8,
|
12631 |
"trial_name": null,
|
12632 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9662921348314608,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12607 |
"learning_rate": 2.6460419706234897e-06,
|
12608 |
"loss": 1.1064,
|
12609 |
"step": 1800
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 1.6863295880149813,
|
12613 |
+
"grad_norm": 0.0,
|
12614 |
+
"learning_rate": 2.63066938055484e-06,
|
12615 |
+
"loss": 1.148,
|
12616 |
+
"step": 1801
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 1.6872659176029963,
|
12620 |
+
"grad_norm": 0.0,
|
12621 |
+
"learning_rate": 2.6153384312461083e-06,
|
12622 |
+
"loss": 1.1605,
|
12623 |
+
"step": 1802
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 1.6882022471910112,
|
12627 |
+
"grad_norm": 0.0,
|
12628 |
+
"learning_rate": 2.600049159451288e-06,
|
12629 |
+
"loss": 1.1428,
|
12630 |
+
"step": 1803
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 1.6891385767790261,
|
12634 |
+
"grad_norm": 0.0,
|
12635 |
+
"learning_rate": 2.5848016018244582e-06,
|
12636 |
+
"loss": 1.0861,
|
12637 |
+
"step": 1804
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 1.690074906367041,
|
12641 |
+
"grad_norm": 0.0,
|
12642 |
+
"learning_rate": 2.569595794919708e-06,
|
12643 |
+
"loss": 1.1292,
|
12644 |
+
"step": 1805
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 1.6910112359550562,
|
12648 |
+
"grad_norm": 0.0,
|
12649 |
+
"learning_rate": 2.5544317751910197e-06,
|
12650 |
+
"loss": 1.1905,
|
12651 |
+
"step": 1806
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 1.6919475655430711,
|
12655 |
+
"grad_norm": 0.0,
|
12656 |
+
"learning_rate": 2.5393095789921972e-06,
|
12657 |
+
"loss": 1.1001,
|
12658 |
+
"step": 1807
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 1.6928838951310863,
|
12662 |
+
"grad_norm": 0.0,
|
12663 |
+
"learning_rate": 2.52422924257679e-06,
|
12664 |
+
"loss": 1.1106,
|
12665 |
+
"step": 1808
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 1.6938202247191012,
|
12669 |
+
"grad_norm": 0.0,
|
12670 |
+
"learning_rate": 2.5091908020979783e-06,
|
12671 |
+
"loss": 1.1609,
|
12672 |
+
"step": 1809
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 1.6947565543071161,
|
12676 |
+
"grad_norm": 0.0,
|
12677 |
+
"learning_rate": 2.4941942936085116e-06,
|
12678 |
+
"loss": 1.1805,
|
12679 |
+
"step": 1810
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 1.695692883895131,
|
12683 |
+
"grad_norm": 0.0,
|
12684 |
+
"learning_rate": 2.4792397530606137e-06,
|
12685 |
+
"loss": 1.1379,
|
12686 |
+
"step": 1811
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 1.696629213483146,
|
12690 |
+
"grad_norm": 0.0,
|
12691 |
+
"learning_rate": 2.4643272163058927e-06,
|
12692 |
+
"loss": 1.1422,
|
12693 |
+
"step": 1812
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 1.697565543071161,
|
12697 |
+
"grad_norm": 0.0,
|
12698 |
+
"learning_rate": 2.4494567190952512e-06,
|
12699 |
+
"loss": 1.1212,
|
12700 |
+
"step": 1813
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 1.698501872659176,
|
12704 |
+
"grad_norm": 0.0,
|
12705 |
+
"learning_rate": 2.4346282970788228e-06,
|
12706 |
+
"loss": 1.0981,
|
12707 |
+
"step": 1814
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 1.699438202247191,
|
12711 |
+
"grad_norm": 0.0,
|
12712 |
+
"learning_rate": 2.4198419858058577e-06,
|
12713 |
+
"loss": 1.1868,
|
12714 |
+
"step": 1815
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 1.7003745318352061,
|
12718 |
+
"grad_norm": 0.0,
|
12719 |
+
"learning_rate": 2.4050978207246513e-06,
|
12720 |
+
"loss": 1.0925,
|
12721 |
+
"step": 1816
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 1.701310861423221,
|
12725 |
+
"grad_norm": 0.0,
|
12726 |
+
"learning_rate": 2.390395837182471e-06,
|
12727 |
+
"loss": 1.1553,
|
12728 |
+
"step": 1817
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 1.702247191011236,
|
12732 |
+
"grad_norm": 0.0,
|
12733 |
+
"learning_rate": 2.3757360704254496e-06,
|
12734 |
+
"loss": 1.122,
|
12735 |
+
"step": 1818
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 1.703183520599251,
|
12739 |
+
"grad_norm": 0.0,
|
12740 |
+
"learning_rate": 2.361118555598505e-06,
|
12741 |
+
"loss": 1.1165,
|
12742 |
+
"step": 1819
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 1.7041198501872659,
|
12746 |
+
"grad_norm": 0.0,
|
12747 |
+
"learning_rate": 2.3465433277452764e-06,
|
12748 |
+
"loss": 1.1197,
|
12749 |
+
"step": 1820
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 1.7050561797752808,
|
12753 |
+
"grad_norm": 0.0,
|
12754 |
+
"learning_rate": 2.332010421808022e-06,
|
12755 |
+
"loss": 1.1236,
|
12756 |
+
"step": 1821
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 1.7059925093632957,
|
12760 |
+
"grad_norm": 0.0,
|
12761 |
+
"learning_rate": 2.3175198726275226e-06,
|
12762 |
+
"loss": 1.1179,
|
12763 |
+
"step": 1822
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 1.7069288389513109,
|
12767 |
+
"grad_norm": 0.0,
|
12768 |
+
"learning_rate": 2.303071714943039e-06,
|
12769 |
+
"loss": 1.1035,
|
12770 |
+
"step": 1823
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 1.7078651685393258,
|
12774 |
+
"grad_norm": 0.0,
|
12775 |
+
"learning_rate": 2.288665983392182e-06,
|
12776 |
+
"loss": 1.103,
|
12777 |
+
"step": 1824
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 1.708801498127341,
|
12781 |
+
"grad_norm": 0.0,
|
12782 |
+
"learning_rate": 2.2743027125108583e-06,
|
12783 |
+
"loss": 1.1937,
|
12784 |
+
"step": 1825
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 1.7097378277153559,
|
12788 |
+
"grad_norm": 0.0,
|
12789 |
+
"learning_rate": 2.259981936733189e-06,
|
12790 |
+
"loss": 1.091,
|
12791 |
+
"step": 1826
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 1.7106741573033708,
|
12795 |
+
"grad_norm": 0.0,
|
12796 |
+
"learning_rate": 2.2457036903914097e-06,
|
12797 |
+
"loss": 1.1459,
|
12798 |
+
"step": 1827
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 1.7116104868913857,
|
12802 |
+
"grad_norm": 0.0,
|
12803 |
+
"learning_rate": 2.2314680077157935e-06,
|
12804 |
+
"loss": 1.1313,
|
12805 |
+
"step": 1828
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 1.7125468164794007,
|
12809 |
+
"grad_norm": 0.0,
|
12810 |
+
"learning_rate": 2.217274922834587e-06,
|
12811 |
+
"loss": 1.1171,
|
12812 |
+
"step": 1829
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 1.7134831460674156,
|
12816 |
+
"grad_norm": 0.0,
|
12817 |
+
"learning_rate": 2.2031244697738983e-06,
|
12818 |
+
"loss": 1.1394,
|
12819 |
+
"step": 1830
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 1.7144194756554307,
|
12823 |
+
"grad_norm": 0.0,
|
12824 |
+
"learning_rate": 2.1890166824576386e-06,
|
12825 |
+
"loss": 1.1108,
|
12826 |
+
"step": 1831
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 1.7153558052434457,
|
12830 |
+
"grad_norm": 0.0,
|
12831 |
+
"learning_rate": 2.174951594707433e-06,
|
12832 |
+
"loss": 1.1035,
|
12833 |
+
"step": 1832
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 1.7162921348314608,
|
12837 |
+
"grad_norm": 0.0,
|
12838 |
+
"learning_rate": 2.1609292402425463e-06,
|
12839 |
+
"loss": 1.1463,
|
12840 |
+
"step": 1833
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 1.7172284644194757,
|
12844 |
+
"grad_norm": 0.0,
|
12845 |
+
"learning_rate": 2.1469496526797794e-06,
|
12846 |
+
"loss": 1.1339,
|
12847 |
+
"step": 1834
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 1.7181647940074907,
|
12851 |
+
"grad_norm": 0.0,
|
12852 |
+
"learning_rate": 2.1330128655334213e-06,
|
12853 |
+
"loss": 1.1219,
|
12854 |
+
"step": 1835
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 1.7191011235955056,
|
12858 |
+
"grad_norm": 0.0,
|
12859 |
+
"learning_rate": 2.1191189122151457e-06,
|
12860 |
+
"loss": 1.1204,
|
12861 |
+
"step": 1836
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 1.7200374531835205,
|
12865 |
+
"grad_norm": 0.0,
|
12866 |
+
"learning_rate": 2.10526782603393e-06,
|
12867 |
+
"loss": 1.1782,
|
12868 |
+
"step": 1837
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 1.7209737827715355,
|
12872 |
+
"grad_norm": 0.0,
|
12873 |
+
"learning_rate": 2.091459640196001e-06,
|
12874 |
+
"loss": 1.153,
|
12875 |
+
"step": 1838
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 1.7219101123595506,
|
12879 |
+
"grad_norm": 0.0,
|
12880 |
+
"learning_rate": 2.077694387804721e-06,
|
12881 |
+
"loss": 1.0803,
|
12882 |
+
"step": 1839
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 1.7228464419475655,
|
12886 |
+
"grad_norm": 0.0,
|
12887 |
+
"learning_rate": 2.0639721018605274e-06,
|
12888 |
+
"loss": 1.1306,
|
12889 |
+
"step": 1840
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 1.7237827715355807,
|
12893 |
+
"grad_norm": 0.0,
|
12894 |
+
"learning_rate": 2.0502928152608658e-06,
|
12895 |
+
"loss": 1.1312,
|
12896 |
+
"step": 1841
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 1.7247191011235956,
|
12900 |
+
"grad_norm": 0.0,
|
12901 |
+
"learning_rate": 2.036656560800072e-06,
|
12902 |
+
"loss": 1.1966,
|
12903 |
+
"step": 1842
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 1.7256554307116105,
|
12907 |
+
"grad_norm": 0.0,
|
12908 |
+
"learning_rate": 2.0230633711693405e-06,
|
12909 |
+
"loss": 1.0777,
|
12910 |
+
"step": 1843
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 1.7265917602996255,
|
12914 |
+
"grad_norm": 0.0,
|
12915 |
+
"learning_rate": 2.009513278956612e-06,
|
12916 |
+
"loss": 1.1132,
|
12917 |
+
"step": 1844
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 1.7275280898876404,
|
12921 |
+
"grad_norm": 0.0,
|
12922 |
+
"learning_rate": 1.996006316646508e-06,
|
12923 |
+
"loss": 1.1333,
|
12924 |
+
"step": 1845
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 1.7284644194756553,
|
12928 |
+
"grad_norm": 0.0,
|
12929 |
+
"learning_rate": 1.9825425166202472e-06,
|
12930 |
+
"loss": 1.1191,
|
12931 |
+
"step": 1846
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 1.7294007490636703,
|
12935 |
+
"grad_norm": 0.0,
|
12936 |
+
"learning_rate": 1.969121911155585e-06,
|
12937 |
+
"loss": 1.1464,
|
12938 |
+
"step": 1847
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 1.7303370786516854,
|
12942 |
+
"grad_norm": 0.0,
|
12943 |
+
"learning_rate": 1.955744532426711e-06,
|
12944 |
+
"loss": 1.1273,
|
12945 |
+
"step": 1848
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 1.7312734082397003,
|
12949 |
+
"grad_norm": 0.0,
|
12950 |
+
"learning_rate": 1.942410412504183e-06,
|
12951 |
+
"loss": 1.078,
|
12952 |
+
"step": 1849
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 1.7322097378277155,
|
12956 |
+
"grad_norm": 0.0,
|
12957 |
+
"learning_rate": 1.929119583354868e-06,
|
12958 |
+
"loss": 1.111,
|
12959 |
+
"step": 1850
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 1.7331460674157304,
|
12963 |
+
"grad_norm": 0.0,
|
12964 |
+
"learning_rate": 1.915872076841827e-06,
|
12965 |
+
"loss": 1.0737,
|
12966 |
+
"step": 1851
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 1.7340823970037453,
|
12970 |
+
"grad_norm": 0.0,
|
12971 |
+
"learning_rate": 1.9026679247242796e-06,
|
12972 |
+
"loss": 1.181,
|
12973 |
+
"step": 1852
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 1.7350187265917603,
|
12977 |
+
"grad_norm": 0.0,
|
12978 |
+
"learning_rate": 1.8895071586574932e-06,
|
12979 |
+
"loss": 1.1557,
|
12980 |
+
"step": 1853
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 1.7359550561797752,
|
12984 |
+
"grad_norm": 0.0,
|
12985 |
+
"learning_rate": 1.8763898101927358e-06,
|
12986 |
+
"loss": 1.1126,
|
12987 |
+
"step": 1854
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 1.7368913857677901,
|
12991 |
+
"grad_norm": 0.0,
|
12992 |
+
"learning_rate": 1.8633159107771747e-06,
|
12993 |
+
"loss": 1.1064,
|
12994 |
+
"step": 1855
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 1.7378277153558053,
|
12998 |
+
"grad_norm": 0.0,
|
12999 |
+
"learning_rate": 1.850285491753827e-06,
|
13000 |
+
"loss": 1.1032,
|
13001 |
+
"step": 1856
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 1.7387640449438202,
|
13005 |
+
"grad_norm": 0.0,
|
13006 |
+
"learning_rate": 1.8372985843614577e-06,
|
13007 |
+
"loss": 1.1716,
|
13008 |
+
"step": 1857
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 1.7397003745318353,
|
13012 |
+
"grad_norm": 0.0,
|
13013 |
+
"learning_rate": 1.8243552197345327e-06,
|
13014 |
+
"loss": 1.1517,
|
13015 |
+
"step": 1858
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 1.7406367041198503,
|
13019 |
+
"grad_norm": 0.0,
|
13020 |
+
"learning_rate": 1.8114554289031172e-06,
|
13021 |
+
"loss": 1.1146,
|
13022 |
+
"step": 1859
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 1.7415730337078652,
|
13026 |
+
"grad_norm": 0.0,
|
13027 |
+
"learning_rate": 1.7985992427928133e-06,
|
13028 |
+
"loss": 1.1564,
|
13029 |
+
"step": 1860
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 1.7425093632958801,
|
13033 |
+
"grad_norm": 0.0,
|
13034 |
+
"learning_rate": 1.785786692224698e-06,
|
13035 |
+
"loss": 1.1501,
|
13036 |
+
"step": 1861
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 1.743445692883895,
|
13040 |
+
"grad_norm": 0.0,
|
13041 |
+
"learning_rate": 1.7730178079152315e-06,
|
13042 |
+
"loss": 1.1287,
|
13043 |
+
"step": 1862
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 1.74438202247191,
|
13047 |
+
"grad_norm": 0.0,
|
13048 |
+
"learning_rate": 1.7602926204761806e-06,
|
13049 |
+
"loss": 1.1267,
|
13050 |
+
"step": 1863
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 1.7453183520599251,
|
13054 |
+
"grad_norm": 0.0,
|
13055 |
+
"learning_rate": 1.747611160414573e-06,
|
13056 |
+
"loss": 1.1126,
|
13057 |
+
"step": 1864
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 1.74625468164794,
|
13061 |
+
"grad_norm": 0.0,
|
13062 |
+
"learning_rate": 1.7349734581325894e-06,
|
13063 |
+
"loss": 1.1432,
|
13064 |
+
"step": 1865
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 1.7471910112359552,
|
13068 |
+
"grad_norm": 0.0,
|
13069 |
+
"learning_rate": 1.7223795439275125e-06,
|
13070 |
+
"loss": 1.1108,
|
13071 |
+
"step": 1866
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 1.7481273408239701,
|
13075 |
+
"grad_norm": 0.0,
|
13076 |
+
"learning_rate": 1.7098294479916555e-06,
|
13077 |
+
"loss": 1.1209,
|
13078 |
+
"step": 1867
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 1.749063670411985,
|
13082 |
+
"grad_norm": 0.0,
|
13083 |
+
"learning_rate": 1.6973232004122708e-06,
|
13084 |
+
"loss": 1.1759,
|
13085 |
+
"step": 1868
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 1.75,
|
13089 |
+
"grad_norm": 0.0,
|
13090 |
+
"learning_rate": 1.6848608311714954e-06,
|
13091 |
+
"loss": 1.1114,
|
13092 |
+
"step": 1869
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 1.750936329588015,
|
13096 |
+
"grad_norm": 0.0,
|
13097 |
+
"learning_rate": 1.6724423701462767e-06,
|
13098 |
+
"loss": 1.1744,
|
13099 |
+
"step": 1870
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 1.7518726591760299,
|
13103 |
+
"grad_norm": 0.0,
|
13104 |
+
"learning_rate": 1.660067847108293e-06,
|
13105 |
+
"loss": 1.1327,
|
13106 |
+
"step": 1871
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 1.7528089887640448,
|
13110 |
+
"grad_norm": 0.0,
|
13111 |
+
"learning_rate": 1.6477372917238877e-06,
|
13112 |
+
"loss": 1.1368,
|
13113 |
+
"step": 1872
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 1.75374531835206,
|
13117 |
+
"grad_norm": 0.0,
|
13118 |
+
"learning_rate": 1.6354507335540004e-06,
|
13119 |
+
"loss": 1.1294,
|
13120 |
+
"step": 1873
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 1.7546816479400749,
|
13124 |
+
"grad_norm": 0.0,
|
13125 |
+
"learning_rate": 1.6232082020540918e-06,
|
13126 |
+
"loss": 1.1278,
|
13127 |
+
"step": 1874
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 1.75561797752809,
|
13131 |
+
"grad_norm": 0.0,
|
13132 |
+
"learning_rate": 1.6110097265740665e-06,
|
13133 |
+
"loss": 1.1718,
|
13134 |
+
"step": 1875
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 1.756554307116105,
|
13138 |
+
"grad_norm": 0.0,
|
13139 |
+
"learning_rate": 1.5988553363582225e-06,
|
13140 |
+
"loss": 1.0986,
|
13141 |
+
"step": 1876
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 1.7574906367041199,
|
13145 |
+
"grad_norm": 0.0,
|
13146 |
+
"learning_rate": 1.5867450605451673e-06,
|
13147 |
+
"loss": 1.1462,
|
13148 |
+
"step": 1877
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 1.7584269662921348,
|
13152 |
+
"grad_norm": 0.0,
|
13153 |
+
"learning_rate": 1.5746789281677366e-06,
|
13154 |
+
"loss": 1.1643,
|
13155 |
+
"step": 1878
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 1.7593632958801497,
|
13159 |
+
"grad_norm": 0.0,
|
13160 |
+
"learning_rate": 1.562656968152958e-06,
|
13161 |
+
"loss": 1.1455,
|
13162 |
+
"step": 1879
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 1.7602996254681647,
|
13166 |
+
"grad_norm": 0.0,
|
13167 |
+
"learning_rate": 1.550679209321946e-06,
|
13168 |
+
"loss": 1.1248,
|
13169 |
+
"step": 1880
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 1.7612359550561798,
|
13173 |
+
"grad_norm": 0.0,
|
13174 |
+
"learning_rate": 1.5387456803898483e-06,
|
13175 |
+
"loss": 1.1048,
|
13176 |
+
"step": 1881
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 1.7621722846441947,
|
13180 |
+
"grad_norm": 0.0,
|
13181 |
+
"learning_rate": 1.5268564099657913e-06,
|
13182 |
+
"loss": 1.1194,
|
13183 |
+
"step": 1882
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 1.7631086142322099,
|
13187 |
+
"grad_norm": 0.0,
|
13188 |
+
"learning_rate": 1.5150114265527816e-06,
|
13189 |
+
"loss": 1.1161,
|
13190 |
+
"step": 1883
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 1.7640449438202248,
|
13194 |
+
"grad_norm": 0.0,
|
13195 |
+
"learning_rate": 1.5032107585476597e-06,
|
13196 |
+
"loss": 1.123,
|
13197 |
+
"step": 1884
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 1.7649812734082397,
|
13201 |
+
"grad_norm": 0.0,
|
13202 |
+
"learning_rate": 1.4914544342410286e-06,
|
13203 |
+
"loss": 1.1087,
|
13204 |
+
"step": 1885
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 1.7659176029962547,
|
13208 |
+
"grad_norm": 0.0,
|
13209 |
+
"learning_rate": 1.4797424818171747e-06,
|
13210 |
+
"loss": 1.1131,
|
13211 |
+
"step": 1886
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 1.7668539325842696,
|
13215 |
+
"grad_norm": 0.0,
|
13216 |
+
"learning_rate": 1.468074929354022e-06,
|
13217 |
+
"loss": 1.0586,
|
13218 |
+
"step": 1887
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 1.7677902621722845,
|
13222 |
+
"grad_norm": 0.0,
|
13223 |
+
"learning_rate": 1.4564518048230336e-06,
|
13224 |
+
"loss": 1.1406,
|
13225 |
+
"step": 1888
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 1.7687265917602997,
|
13229 |
+
"grad_norm": 0.0,
|
13230 |
+
"learning_rate": 1.44487313608918e-06,
|
13231 |
+
"loss": 1.1916,
|
13232 |
+
"step": 1889
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 1.7696629213483146,
|
13236 |
+
"grad_norm": 0.0,
|
13237 |
+
"learning_rate": 1.4333389509108386e-06,
|
13238 |
+
"loss": 1.1122,
|
13239 |
+
"step": 1890
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 1.7705992509363297,
|
13243 |
+
"grad_norm": 0.0,
|
13244 |
+
"learning_rate": 1.4218492769397573e-06,
|
13245 |
+
"loss": 1.0686,
|
13246 |
+
"step": 1891
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 1.7715355805243447,
|
13250 |
+
"grad_norm": 0.0,
|
13251 |
+
"learning_rate": 1.4104041417209646e-06,
|
13252 |
+
"loss": 1.1759,
|
13253 |
+
"step": 1892
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 1.7724719101123596,
|
13257 |
+
"grad_norm": 0.0,
|
13258 |
+
"learning_rate": 1.3990035726927121e-06,
|
13259 |
+
"loss": 1.1286,
|
13260 |
+
"step": 1893
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 1.7734082397003745,
|
13264 |
+
"grad_norm": 0.0,
|
13265 |
+
"learning_rate": 1.3876475971864189e-06,
|
13266 |
+
"loss": 1.1434,
|
13267 |
+
"step": 1894
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 1.7743445692883895,
|
13271 |
+
"grad_norm": 0.0,
|
13272 |
+
"learning_rate": 1.3763362424265902e-06,
|
13273 |
+
"loss": 1.1311,
|
13274 |
+
"step": 1895
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 1.7752808988764044,
|
13278 |
+
"grad_norm": 0.0,
|
13279 |
+
"learning_rate": 1.3650695355307542e-06,
|
13280 |
+
"loss": 1.1556,
|
13281 |
+
"step": 1896
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 1.7762172284644193,
|
13285 |
+
"grad_norm": 0.0,
|
13286 |
+
"learning_rate": 1.3538475035094134e-06,
|
13287 |
+
"loss": 1.1689,
|
13288 |
+
"step": 1897
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 1.7771535580524345,
|
13292 |
+
"grad_norm": 0.0,
|
13293 |
+
"learning_rate": 1.3426701732659586e-06,
|
13294 |
+
"loss": 1.159,
|
13295 |
+
"step": 1898
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 1.7780898876404494,
|
13299 |
+
"grad_norm": 0.0,
|
13300 |
+
"learning_rate": 1.331537571596615e-06,
|
13301 |
+
"loss": 1.1431,
|
13302 |
+
"step": 1899
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 1.7790262172284645,
|
13306 |
+
"grad_norm": 0.0,
|
13307 |
+
"learning_rate": 1.320449725190387e-06,
|
13308 |
+
"loss": 1.1011,
|
13309 |
+
"step": 1900
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 1.7799625468164795,
|
13313 |
+
"grad_norm": 0.0,
|
13314 |
+
"learning_rate": 1.3094066606289668e-06,
|
13315 |
+
"loss": 1.1556,
|
13316 |
+
"step": 1901
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 1.7808988764044944,
|
13320 |
+
"grad_norm": 0.0,
|
13321 |
+
"learning_rate": 1.2984084043866974e-06,
|
13322 |
+
"loss": 1.1456,
|
13323 |
+
"step": 1902
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 1.7818352059925093,
|
13327 |
+
"grad_norm": 0.0,
|
13328 |
+
"learning_rate": 1.2874549828305028e-06,
|
13329 |
+
"loss": 1.1227,
|
13330 |
+
"step": 1903
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 1.7827715355805243,
|
13334 |
+
"grad_norm": 0.0,
|
13335 |
+
"learning_rate": 1.2765464222198131e-06,
|
13336 |
+
"loss": 1.0714,
|
13337 |
+
"step": 1904
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 1.7837078651685392,
|
13341 |
+
"grad_norm": 0.0,
|
13342 |
+
"learning_rate": 1.265682748706516e-06,
|
13343 |
+
"loss": 1.1972,
|
13344 |
+
"step": 1905
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 1.7846441947565543,
|
13348 |
+
"grad_norm": 0.0,
|
13349 |
+
"learning_rate": 1.2548639883348868e-06,
|
13350 |
+
"loss": 1.151,
|
13351 |
+
"step": 1906
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 1.7855805243445693,
|
13355 |
+
"grad_norm": 0.0,
|
13356 |
+
"learning_rate": 1.2440901670415274e-06,
|
13357 |
+
"loss": 1.168,
|
13358 |
+
"step": 1907
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 1.7865168539325844,
|
13362 |
+
"grad_norm": 0.0,
|
13363 |
+
"learning_rate": 1.233361310655301e-06,
|
13364 |
+
"loss": 1.1747,
|
13365 |
+
"step": 1908
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 1.7874531835205993,
|
13369 |
+
"grad_norm": 0.0,
|
13370 |
+
"learning_rate": 1.2226774448972733e-06,
|
13371 |
+
"loss": 1.1168,
|
13372 |
+
"step": 1909
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 1.7883895131086143,
|
13376 |
+
"grad_norm": 0.0,
|
13377 |
+
"learning_rate": 1.2120385953806625e-06,
|
13378 |
+
"loss": 1.112,
|
13379 |
+
"step": 1910
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 1.7893258426966292,
|
13383 |
+
"grad_norm": 0.0,
|
13384 |
+
"learning_rate": 1.2014447876107461e-06,
|
13385 |
+
"loss": 1.1243,
|
13386 |
+
"step": 1911
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 1.7902621722846441,
|
13390 |
+
"grad_norm": 0.0,
|
13391 |
+
"learning_rate": 1.1908960469848373e-06,
|
13392 |
+
"loss": 1.148,
|
13393 |
+
"step": 1912
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 1.791198501872659,
|
13397 |
+
"grad_norm": 0.0,
|
13398 |
+
"learning_rate": 1.1803923987921983e-06,
|
13399 |
+
"loss": 1.1278,
|
13400 |
+
"step": 1913
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 1.7921348314606742,
|
13404 |
+
"grad_norm": 0.0,
|
13405 |
+
"learning_rate": 1.1699338682139815e-06,
|
13406 |
+
"loss": 1.1665,
|
13407 |
+
"step": 1914
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 1.7930711610486891,
|
13411 |
+
"grad_norm": 0.0,
|
13412 |
+
"learning_rate": 1.1595204803231952e-06,
|
13413 |
+
"loss": 1.1424,
|
13414 |
+
"step": 1915
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 1.7940074906367043,
|
13418 |
+
"grad_norm": 0.0,
|
13419 |
+
"learning_rate": 1.1491522600846006e-06,
|
13420 |
+
"loss": 1.1059,
|
13421 |
+
"step": 1916
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 1.7949438202247192,
|
13425 |
+
"grad_norm": 0.0,
|
13426 |
+
"learning_rate": 1.1388292323546946e-06,
|
13427 |
+
"loss": 1.1319,
|
13428 |
+
"step": 1917
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 1.7958801498127341,
|
13432 |
+
"grad_norm": 0.0,
|
13433 |
+
"learning_rate": 1.1285514218816206e-06,
|
13434 |
+
"loss": 1.1768,
|
13435 |
+
"step": 1918
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 1.796816479400749,
|
13439 |
+
"grad_norm": 0.0,
|
13440 |
+
"learning_rate": 1.118318853305116e-06,
|
13441 |
+
"loss": 1.1037,
|
13442 |
+
"step": 1919
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 1.797752808988764,
|
13446 |
+
"grad_norm": 0.0,
|
13447 |
+
"learning_rate": 1.1081315511564684e-06,
|
13448 |
+
"loss": 1.1438,
|
13449 |
+
"step": 1920
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 1.798689138576779,
|
13453 |
+
"grad_norm": 0.0,
|
13454 |
+
"learning_rate": 1.097989539858435e-06,
|
13455 |
+
"loss": 1.1215,
|
13456 |
+
"step": 1921
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 1.7996254681647939,
|
13460 |
+
"grad_norm": 0.0,
|
13461 |
+
"learning_rate": 1.0878928437251957e-06,
|
13462 |
+
"loss": 1.1214,
|
13463 |
+
"step": 1922
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 1.800561797752809,
|
13467 |
+
"grad_norm": 0.0,
|
13468 |
+
"learning_rate": 1.077841486962301e-06,
|
13469 |
+
"loss": 1.1535,
|
13470 |
+
"step": 1923
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 1.801498127340824,
|
13474 |
+
"grad_norm": 0.0,
|
13475 |
+
"learning_rate": 1.067835493666598e-06,
|
13476 |
+
"loss": 1.1353,
|
13477 |
+
"step": 1924
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 1.802434456928839,
|
13481 |
+
"grad_norm": 0.0,
|
13482 |
+
"learning_rate": 1.0578748878261757e-06,
|
13483 |
+
"loss": 1.1666,
|
13484 |
+
"step": 1925
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 1.803370786516854,
|
13488 |
+
"grad_norm": 0.0,
|
13489 |
+
"learning_rate": 1.0479596933203262e-06,
|
13490 |
+
"loss": 1.1271,
|
13491 |
+
"step": 1926
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 1.804307116104869,
|
13495 |
+
"grad_norm": 0.0,
|
13496 |
+
"learning_rate": 1.0380899339194639e-06,
|
13497 |
+
"loss": 1.1295,
|
13498 |
+
"step": 1927
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 1.8052434456928839,
|
13502 |
+
"grad_norm": 0.0,
|
13503 |
+
"learning_rate": 1.0282656332850792e-06,
|
13504 |
+
"loss": 1.1604,
|
13505 |
+
"step": 1928
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 1.8061797752808988,
|
13509 |
+
"grad_norm": 0.0,
|
13510 |
+
"learning_rate": 1.018486814969688e-06,
|
13511 |
+
"loss": 1.1655,
|
13512 |
+
"step": 1929
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 1.8071161048689137,
|
13516 |
+
"grad_norm": 0.0,
|
13517 |
+
"learning_rate": 1.0087535024167593e-06,
|
13518 |
+
"loss": 1.1279,
|
13519 |
+
"step": 1930
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 1.8080524344569289,
|
13523 |
+
"grad_norm": 0.0,
|
13524 |
+
"learning_rate": 9.990657189606678e-07,
|
13525 |
+
"loss": 1.1054,
|
13526 |
+
"step": 1931
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 1.8089887640449438,
|
13530 |
+
"grad_norm": 0.0,
|
13531 |
+
"learning_rate": 9.894234878266461e-07,
|
13532 |
+
"loss": 1.1196,
|
13533 |
+
"step": 1932
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 1.809925093632959,
|
13537 |
+
"grad_norm": 0.0,
|
13538 |
+
"learning_rate": 9.798268321307193e-07,
|
13539 |
+
"loss": 1.1167,
|
13540 |
+
"step": 1933
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 1.8108614232209739,
|
13544 |
+
"grad_norm": 0.0,
|
13545 |
+
"learning_rate": 9.702757748796431e-07,
|
13546 |
+
"loss": 1.1148,
|
13547 |
+
"step": 1934
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 1.8117977528089888,
|
13551 |
+
"grad_norm": 0.0,
|
13552 |
+
"learning_rate": 9.607703389708645e-07,
|
13553 |
+
"loss": 1.1371,
|
13554 |
+
"step": 1935
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 1.8127340823970037,
|
13558 |
+
"grad_norm": 0.0,
|
13559 |
+
"learning_rate": 9.513105471924544e-07,
|
13560 |
+
"loss": 1.1407,
|
13561 |
+
"step": 1936
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 1.8136704119850187,
|
13565 |
+
"grad_norm": 0.0,
|
13566 |
+
"learning_rate": 9.418964222230609e-07,
|
13567 |
+
"loss": 1.1063,
|
13568 |
+
"step": 1937
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 1.8146067415730336,
|
13572 |
+
"grad_norm": 0.0,
|
13573 |
+
"learning_rate": 9.325279866318526e-07,
|
13574 |
+
"loss": 1.1421,
|
13575 |
+
"step": 1938
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 1.8155430711610487,
|
13579 |
+
"grad_norm": 0.0,
|
13580 |
+
"learning_rate": 9.232052628784594e-07,
|
13581 |
+
"loss": 1.1468,
|
13582 |
+
"step": 1939
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 1.8164794007490637,
|
13586 |
+
"grad_norm": 0.0,
|
13587 |
+
"learning_rate": 9.139282733129229e-07,
|
13588 |
+
"loss": 1.1409,
|
13589 |
+
"step": 1940
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 1.8174157303370788,
|
13593 |
+
"grad_norm": 0.0,
|
13594 |
+
"learning_rate": 9.046970401756505e-07,
|
13595 |
+
"loss": 1.098,
|
13596 |
+
"step": 1941
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 1.8183520599250937,
|
13600 |
+
"grad_norm": 0.0,
|
13601 |
+
"learning_rate": 8.955115855973418e-07,
|
13602 |
+
"loss": 1.1083,
|
13603 |
+
"step": 1942
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 1.8192883895131087,
|
13607 |
+
"grad_norm": 0.0,
|
13608 |
+
"learning_rate": 8.863719315989616e-07,
|
13609 |
+
"loss": 1.1337,
|
13610 |
+
"step": 1943
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 1.8202247191011236,
|
13614 |
+
"grad_norm": 0.0,
|
13615 |
+
"learning_rate": 8.772781000916697e-07,
|
13616 |
+
"loss": 1.1484,
|
13617 |
+
"step": 1944
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 1.8211610486891385,
|
13621 |
+
"grad_norm": 0.0,
|
13622 |
+
"learning_rate": 8.682301128767623e-07,
|
13623 |
+
"loss": 1.1139,
|
13624 |
+
"step": 1945
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 1.8220973782771535,
|
13628 |
+
"grad_norm": 0.0,
|
13629 |
+
"learning_rate": 8.592279916456414e-07,
|
13630 |
+
"loss": 1.0921,
|
13631 |
+
"step": 1946
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 1.8230337078651684,
|
13635 |
+
"grad_norm": 0.0,
|
13636 |
+
"learning_rate": 8.502717579797482e-07,
|
13637 |
+
"loss": 1.1355,
|
13638 |
+
"step": 1947
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 1.8239700374531835,
|
13642 |
+
"grad_norm": 0.0,
|
13643 |
+
"learning_rate": 8.413614333505115e-07,
|
13644 |
+
"loss": 1.1384,
|
13645 |
+
"step": 1948
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 1.8249063670411985,
|
13649 |
+
"grad_norm": 0.0,
|
13650 |
+
"learning_rate": 8.324970391192977e-07,
|
13651 |
+
"loss": 1.1348,
|
13652 |
+
"step": 1949
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 1.8258426966292136,
|
13656 |
+
"grad_norm": 0.0,
|
13657 |
+
"learning_rate": 8.236785965373695e-07,
|
13658 |
+
"loss": 1.1656,
|
13659 |
+
"step": 1950
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 1.8267790262172285,
|
13663 |
+
"grad_norm": 0.0,
|
13664 |
+
"learning_rate": 8.14906126745818e-07,
|
13665 |
+
"loss": 1.1524,
|
13666 |
+
"step": 1951
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 1.8277153558052435,
|
13670 |
+
"grad_norm": 0.0,
|
13671 |
+
"learning_rate": 8.061796507755226e-07,
|
13672 |
+
"loss": 1.1466,
|
13673 |
+
"step": 1952
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 1.8286516853932584,
|
13677 |
+
"grad_norm": 0.0,
|
13678 |
+
"learning_rate": 7.974991895470995e-07,
|
13679 |
+
"loss": 1.1212,
|
13680 |
+
"step": 1953
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 1.8295880149812733,
|
13684 |
+
"grad_norm": 0.0,
|
13685 |
+
"learning_rate": 7.888647638708536e-07,
|
13686 |
+
"loss": 1.1098,
|
13687 |
+
"step": 1954
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 1.8305243445692883,
|
13691 |
+
"grad_norm": 0.0,
|
13692 |
+
"learning_rate": 7.802763944467173e-07,
|
13693 |
+
"loss": 1.1012,
|
13694 |
+
"step": 1955
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 1.8314606741573034,
|
13698 |
+
"grad_norm": 0.0,
|
13699 |
+
"learning_rate": 7.717341018642144e-07,
|
13700 |
+
"loss": 1.1494,
|
13701 |
+
"step": 1956
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 1.8323970037453183,
|
13705 |
+
"grad_norm": 0.0,
|
13706 |
+
"learning_rate": 7.632379066024076e-07,
|
13707 |
+
"loss": 1.0921,
|
13708 |
+
"step": 1957
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 1.8333333333333335,
|
13712 |
+
"grad_norm": 0.0,
|
13713 |
+
"learning_rate": 7.547878290298349e-07,
|
13714 |
+
"loss": 1.1483,
|
13715 |
+
"step": 1958
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 1.8342696629213484,
|
13719 |
+
"grad_norm": 0.0,
|
13720 |
+
"learning_rate": 7.463838894044939e-07,
|
13721 |
+
"loss": 1.0768,
|
13722 |
+
"step": 1959
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 1.8352059925093633,
|
13726 |
+
"grad_norm": 0.0,
|
13727 |
+
"learning_rate": 7.380261078737527e-07,
|
13728 |
+
"loss": 1.1355,
|
13729 |
+
"step": 1960
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 1.8361423220973783,
|
13733 |
+
"grad_norm": 0.0,
|
13734 |
+
"learning_rate": 7.297145044743304e-07,
|
13735 |
+
"loss": 1.133,
|
13736 |
+
"step": 1961
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 1.8370786516853932,
|
13740 |
+
"grad_norm": 0.0,
|
13741 |
+
"learning_rate": 7.214490991322409e-07,
|
13742 |
+
"loss": 1.0887,
|
13743 |
+
"step": 1962
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 1.8380149812734081,
|
13747 |
+
"grad_norm": 0.0,
|
13748 |
+
"learning_rate": 7.132299116627384e-07,
|
13749 |
+
"loss": 1.1531,
|
13750 |
+
"step": 1963
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 1.8389513108614233,
|
13754 |
+
"grad_norm": 0.0,
|
13755 |
+
"learning_rate": 7.050569617702785e-07,
|
13756 |
+
"loss": 1.1134,
|
13757 |
+
"step": 1964
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 1.8398876404494382,
|
13761 |
+
"grad_norm": 0.0,
|
13762 |
+
"learning_rate": 6.969302690484725e-07,
|
13763 |
+
"loss": 1.1198,
|
13764 |
+
"step": 1965
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 1.8408239700374533,
|
13768 |
+
"grad_norm": 0.0,
|
13769 |
+
"learning_rate": 6.888498529800314e-07,
|
13770 |
+
"loss": 1.1188,
|
13771 |
+
"step": 1966
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 1.8417602996254683,
|
13775 |
+
"grad_norm": 0.0,
|
13776 |
+
"learning_rate": 6.808157329367193e-07,
|
13777 |
+
"loss": 1.1667,
|
13778 |
+
"step": 1967
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 1.8426966292134832,
|
13782 |
+
"grad_norm": 0.0,
|
13783 |
+
"learning_rate": 6.728279281793248e-07,
|
13784 |
+
"loss": 1.1723,
|
13785 |
+
"step": 1968
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 1.8436329588014981,
|
13789 |
+
"grad_norm": 0.0,
|
13790 |
+
"learning_rate": 6.648864578575897e-07,
|
13791 |
+
"loss": 1.1074,
|
13792 |
+
"step": 1969
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 1.844569288389513,
|
13796 |
+
"grad_norm": 0.0,
|
13797 |
+
"learning_rate": 6.569913410101736e-07,
|
13798 |
+
"loss": 1.0989,
|
13799 |
+
"step": 1970
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 1.845505617977528,
|
13803 |
+
"grad_norm": 0.0,
|
13804 |
+
"learning_rate": 6.491425965646225e-07,
|
13805 |
+
"loss": 1.1316,
|
13806 |
+
"step": 1971
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 1.846441947565543,
|
13810 |
+
"grad_norm": 0.0,
|
13811 |
+
"learning_rate": 6.413402433372983e-07,
|
13812 |
+
"loss": 1.1258,
|
13813 |
+
"step": 1972
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 1.847378277153558,
|
13817 |
+
"grad_norm": 0.0,
|
13818 |
+
"learning_rate": 6.335843000333474e-07,
|
13819 |
+
"loss": 1.1498,
|
13820 |
+
"step": 1973
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 1.848314606741573,
|
13824 |
+
"grad_norm": 0.0,
|
13825 |
+
"learning_rate": 6.258747852466629e-07,
|
13826 |
+
"loss": 1.1731,
|
13827 |
+
"step": 1974
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 1.8492509363295881,
|
13831 |
+
"grad_norm": 0.0,
|
13832 |
+
"learning_rate": 6.182117174598223e-07,
|
13833 |
+
"loss": 1.1382,
|
13834 |
+
"step": 1975
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 1.850187265917603,
|
13838 |
+
"grad_norm": 0.0,
|
13839 |
+
"learning_rate": 6.105951150440526e-07,
|
13840 |
+
"loss": 1.1663,
|
13841 |
+
"step": 1976
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 1.851123595505618,
|
13845 |
+
"grad_norm": 0.0,
|
13846 |
+
"learning_rate": 6.030249962591961e-07,
|
13847 |
+
"loss": 1.1194,
|
13848 |
+
"step": 1977
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 1.852059925093633,
|
13852 |
+
"grad_norm": 0.0,
|
13853 |
+
"learning_rate": 5.955013792536424e-07,
|
13854 |
+
"loss": 1.1026,
|
13855 |
+
"step": 1978
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 1.8529962546816479,
|
13859 |
+
"grad_norm": 0.0,
|
13860 |
+
"learning_rate": 5.880242820643123e-07,
|
13861 |
+
"loss": 1.1151,
|
13862 |
+
"step": 1979
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 1.8539325842696628,
|
13866 |
+
"grad_norm": 0.0,
|
13867 |
+
"learning_rate": 5.805937226165914e-07,
|
13868 |
+
"loss": 1.0811,
|
13869 |
+
"step": 1980
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 1.854868913857678,
|
13873 |
+
"grad_norm": 0.0,
|
13874 |
+
"learning_rate": 5.732097187242969e-07,
|
13875 |
+
"loss": 1.1584,
|
13876 |
+
"step": 1981
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 1.8558052434456929,
|
13880 |
+
"grad_norm": 0.0,
|
13881 |
+
"learning_rate": 5.658722880896439e-07,
|
13882 |
+
"loss": 1.1282,
|
13883 |
+
"step": 1982
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 1.856741573033708,
|
13887 |
+
"grad_norm": 0.0,
|
13888 |
+
"learning_rate": 5.585814483031882e-07,
|
13889 |
+
"loss": 1.1414,
|
13890 |
+
"step": 1983
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 1.857677902621723,
|
13894 |
+
"grad_norm": 0.0,
|
13895 |
+
"learning_rate": 5.513372168437859e-07,
|
13896 |
+
"loss": 1.1196,
|
13897 |
+
"step": 1984
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 1.8586142322097379,
|
13901 |
+
"grad_norm": 0.0,
|
13902 |
+
"learning_rate": 5.441396110785647e-07,
|
13903 |
+
"loss": 1.1034,
|
13904 |
+
"step": 1985
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 1.8595505617977528,
|
13908 |
+
"grad_norm": 0.0,
|
13909 |
+
"learning_rate": 5.369886482628661e-07,
|
13910 |
+
"loss": 1.1757,
|
13911 |
+
"step": 1986
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 1.8604868913857677,
|
13915 |
+
"grad_norm": 0.0,
|
13916 |
+
"learning_rate": 5.298843455402147e-07,
|
13917 |
+
"loss": 1.1241,
|
13918 |
+
"step": 1987
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 1.8614232209737827,
|
13922 |
+
"grad_norm": 0.0,
|
13923 |
+
"learning_rate": 5.228267199422709e-07,
|
13924 |
+
"loss": 1.1542,
|
13925 |
+
"step": 1988
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 1.8623595505617978,
|
13929 |
+
"grad_norm": 0.0,
|
13930 |
+
"learning_rate": 5.158157883887982e-07,
|
13931 |
+
"loss": 1.1527,
|
13932 |
+
"step": 1989
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 1.8632958801498127,
|
13936 |
+
"grad_norm": 0.0,
|
13937 |
+
"learning_rate": 5.088515676876049e-07,
|
13938 |
+
"loss": 1.1325,
|
13939 |
+
"step": 1990
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 1.8642322097378277,
|
13943 |
+
"grad_norm": 0.0,
|
13944 |
+
"learning_rate": 5.019340745345335e-07,
|
13945 |
+
"loss": 1.1534,
|
13946 |
+
"step": 1991
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 1.8651685393258428,
|
13950 |
+
"grad_norm": 0.0,
|
13951 |
+
"learning_rate": 4.950633255133874e-07,
|
13952 |
+
"loss": 1.1376,
|
13953 |
+
"step": 1992
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 1.8661048689138577,
|
13957 |
+
"grad_norm": 0.0,
|
13958 |
+
"learning_rate": 4.882393370959126e-07,
|
13959 |
+
"loss": 1.1075,
|
13960 |
+
"step": 1993
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 1.8670411985018727,
|
13964 |
+
"grad_norm": 0.0,
|
13965 |
+
"learning_rate": 4.814621256417562e-07,
|
13966 |
+
"loss": 1.1335,
|
13967 |
+
"step": 1994
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 1.8679775280898876,
|
13971 |
+
"grad_norm": 0.0,
|
13972 |
+
"learning_rate": 4.7473170739842144e-07,
|
13973 |
+
"loss": 1.1232,
|
13974 |
+
"step": 1995
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 1.8689138576779025,
|
13978 |
+
"grad_norm": 0.0,
|
13979 |
+
"learning_rate": 4.6804809850122147e-07,
|
13980 |
+
"loss": 1.1533,
|
13981 |
+
"step": 1996
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 1.8698501872659175,
|
13985 |
+
"grad_norm": 0.0,
|
13986 |
+
"learning_rate": 4.6141131497326796e-07,
|
13987 |
+
"loss": 1.0967,
|
13988 |
+
"step": 1997
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 1.8707865168539326,
|
13992 |
+
"grad_norm": 0.0,
|
13993 |
+
"learning_rate": 4.5482137272539806e-07,
|
13994 |
+
"loss": 1.1715,
|
13995 |
+
"step": 1998
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 1.8717228464419475,
|
13999 |
+
"grad_norm": 0.0,
|
14000 |
+
"learning_rate": 4.4827828755616307e-07,
|
14001 |
+
"loss": 1.1194,
|
14002 |
+
"step": 1999
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 1.8726591760299627,
|
14006 |
+
"grad_norm": 0.0,
|
14007 |
+
"learning_rate": 4.4178207515177765e-07,
|
14008 |
+
"loss": 1.1007,
|
14009 |
+
"step": 2000
|
14010 |
+
},
|
14011 |
+
{
|
14012 |
+
"epoch": 1.8735955056179776,
|
14013 |
+
"grad_norm": 0.0,
|
14014 |
+
"learning_rate": 4.3533275108608385e-07,
|
14015 |
+
"loss": 1.1305,
|
14016 |
+
"step": 2001
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 1.8745318352059925,
|
14020 |
+
"grad_norm": 0.0,
|
14021 |
+
"learning_rate": 4.289303308205117e-07,
|
14022 |
+
"loss": 1.1265,
|
14023 |
+
"step": 2002
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 1.8754681647940075,
|
14027 |
+
"grad_norm": 0.0,
|
14028 |
+
"learning_rate": 4.2257482970405437e-07,
|
14029 |
+
"loss": 1.1139,
|
14030 |
+
"step": 2003
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 1.8764044943820224,
|
14034 |
+
"grad_norm": 0.0,
|
14035 |
+
"learning_rate": 4.16266262973215e-07,
|
14036 |
+
"loss": 1.163,
|
14037 |
+
"step": 2004
|
14038 |
+
},
|
14039 |
+
{
|
14040 |
+
"epoch": 1.8773408239700373,
|
14041 |
+
"grad_norm": 0.0,
|
14042 |
+
"learning_rate": 4.1000464575197796e-07,
|
14043 |
+
"loss": 1.1103,
|
14044 |
+
"step": 2005
|
14045 |
+
},
|
14046 |
+
{
|
14047 |
+
"epoch": 1.8782771535580525,
|
14048 |
+
"grad_norm": 0.0,
|
14049 |
+
"learning_rate": 4.037899930517752e-07,
|
14050 |
+
"loss": 1.1655,
|
14051 |
+
"step": 2006
|
14052 |
+
},
|
14053 |
+
{
|
14054 |
+
"epoch": 1.8792134831460674,
|
14055 |
+
"grad_norm": 0.0,
|
14056 |
+
"learning_rate": 3.9762231977144683e-07,
|
14057 |
+
"loss": 1.1457,
|
14058 |
+
"step": 2007
|
14059 |
+
},
|
14060 |
+
{
|
14061 |
+
"epoch": 1.8801498127340825,
|
14062 |
+
"grad_norm": 0.0,
|
14063 |
+
"learning_rate": 3.9150164069720277e-07,
|
14064 |
+
"loss": 1.1489,
|
14065 |
+
"step": 2008
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 1.8810861423220975,
|
14069 |
+
"grad_norm": 0.0,
|
14070 |
+
"learning_rate": 3.8542797050258983e-07,
|
14071 |
+
"loss": 1.1192,
|
14072 |
+
"step": 2009
|
14073 |
+
},
|
14074 |
+
{
|
14075 |
+
"epoch": 1.8820224719101124,
|
14076 |
+
"grad_norm": 0.0,
|
14077 |
+
"learning_rate": 3.794013237484673e-07,
|
14078 |
+
"loss": 1.2021,
|
14079 |
+
"step": 2010
|
14080 |
+
},
|
14081 |
+
{
|
14082 |
+
"epoch": 1.8829588014981273,
|
14083 |
+
"grad_norm": 0.0,
|
14084 |
+
"learning_rate": 3.7342171488294666e-07,
|
14085 |
+
"loss": 1.1504,
|
14086 |
+
"step": 2011
|
14087 |
+
},
|
14088 |
+
{
|
14089 |
+
"epoch": 1.8838951310861423,
|
14090 |
+
"grad_norm": 0.0,
|
14091 |
+
"learning_rate": 3.674891582413831e-07,
|
14092 |
+
"loss": 1.1504,
|
14093 |
+
"step": 2012
|
14094 |
+
},
|
14095 |
+
{
|
14096 |
+
"epoch": 1.8848314606741572,
|
14097 |
+
"grad_norm": 0.0,
|
14098 |
+
"learning_rate": 3.616036680463242e-07,
|
14099 |
+
"loss": 1.1132,
|
14100 |
+
"step": 2013
|
14101 |
+
},
|
14102 |
+
{
|
14103 |
+
"epoch": 1.8857677902621723,
|
14104 |
+
"grad_norm": 0.0,
|
14105 |
+
"learning_rate": 3.557652584074833e-07,
|
14106 |
+
"loss": 1.0837,
|
14107 |
+
"step": 2014
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 1.8867041198501873,
|
14111 |
+
"grad_norm": 0.0,
|
14112 |
+
"learning_rate": 3.4997394332170863e-07,
|
14113 |
+
"loss": 1.1002,
|
14114 |
+
"step": 2015
|
14115 |
+
},
|
14116 |
+
{
|
14117 |
+
"epoch": 1.8876404494382022,
|
14118 |
+
"grad_norm": 0.0,
|
14119 |
+
"learning_rate": 3.4422973667293856e-07,
|
14120 |
+
"loss": 1.1536,
|
14121 |
+
"step": 2016
|
14122 |
+
},
|
14123 |
+
{
|
14124 |
+
"epoch": 1.8885767790262173,
|
14125 |
+
"grad_norm": 0.0,
|
14126 |
+
"learning_rate": 3.3853265223217304e-07,
|
14127 |
+
"loss": 1.1041,
|
14128 |
+
"step": 2017
|
14129 |
+
},
|
14130 |
+
{
|
14131 |
+
"epoch": 1.8895131086142323,
|
14132 |
+
"grad_norm": 0.0,
|
14133 |
+
"learning_rate": 3.3288270365745555e-07,
|
14134 |
+
"loss": 1.1267,
|
14135 |
+
"step": 2018
|
14136 |
+
},
|
14137 |
+
{
|
14138 |
+
"epoch": 1.8904494382022472,
|
14139 |
+
"grad_norm": 0.0,
|
14140 |
+
"learning_rate": 3.272799044938113e-07,
|
14141 |
+
"loss": 1.1473,
|
14142 |
+
"step": 2019
|
14143 |
+
},
|
14144 |
+
{
|
14145 |
+
"epoch": 1.8913857677902621,
|
14146 |
+
"grad_norm": 0.0,
|
14147 |
+
"learning_rate": 3.217242681732446e-07,
|
14148 |
+
"loss": 1.0894,
|
14149 |
+
"step": 2020
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 1.892322097378277,
|
14153 |
+
"grad_norm": 0.0,
|
14154 |
+
"learning_rate": 3.1621580801468156e-07,
|
14155 |
+
"loss": 1.12,
|
14156 |
+
"step": 2021
|
14157 |
+
},
|
14158 |
+
{
|
14159 |
+
"epoch": 1.893258426966292,
|
14160 |
+
"grad_norm": 0.0,
|
14161 |
+
"learning_rate": 3.107545372239562e-07,
|
14162 |
+
"loss": 1.1123,
|
14163 |
+
"step": 2022
|
14164 |
+
},
|
14165 |
+
{
|
14166 |
+
"epoch": 1.8941947565543071,
|
14167 |
+
"grad_norm": 0.0,
|
14168 |
+
"learning_rate": 3.053404688937711e-07,
|
14169 |
+
"loss": 1.1422,
|
14170 |
+
"step": 2023
|
14171 |
+
},
|
14172 |
+
{
|
14173 |
+
"epoch": 1.895131086142322,
|
14174 |
+
"grad_norm": 0.0,
|
14175 |
+
"learning_rate": 2.999736160036681e-07,
|
14176 |
+
"loss": 1.1701,
|
14177 |
+
"step": 2024
|
14178 |
+
},
|
14179 |
+
{
|
14180 |
+
"epoch": 1.8960674157303372,
|
14181 |
+
"grad_norm": 0.0,
|
14182 |
+
"learning_rate": 2.9465399141999084e-07,
|
14183 |
+
"loss": 1.1292,
|
14184 |
+
"step": 2025
|
14185 |
+
},
|
14186 |
+
{
|
14187 |
+
"epoch": 1.8970037453183521,
|
14188 |
+
"grad_norm": 0.0,
|
14189 |
+
"learning_rate": 2.8938160789586e-07,
|
14190 |
+
"loss": 1.1291,
|
14191 |
+
"step": 2026
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 1.897940074906367,
|
14195 |
+
"grad_norm": 0.0,
|
14196 |
+
"learning_rate": 2.841564780711492e-07,
|
14197 |
+
"loss": 1.1391,
|
14198 |
+
"step": 2027
|
14199 |
+
},
|
14200 |
+
{
|
14201 |
+
"epoch": 1.898876404494382,
|
14202 |
+
"grad_norm": 0.0,
|
14203 |
+
"learning_rate": 2.7897861447244266e-07,
|
14204 |
+
"loss": 1.1197,
|
14205 |
+
"step": 2028
|
14206 |
+
},
|
14207 |
+
{
|
14208 |
+
"epoch": 1.899812734082397,
|
14209 |
+
"grad_norm": 0.0,
|
14210 |
+
"learning_rate": 2.7384802951300416e-07,
|
14211 |
+
"loss": 1.1532,
|
14212 |
+
"step": 2029
|
14213 |
+
},
|
14214 |
+
{
|
14215 |
+
"epoch": 1.9007490636704119,
|
14216 |
+
"grad_norm": 0.0,
|
14217 |
+
"learning_rate": 2.687647354927636e-07,
|
14218 |
+
"loss": 1.1519,
|
14219 |
+
"step": 2030
|
14220 |
+
},
|
14221 |
+
{
|
14222 |
+
"epoch": 1.901685393258427,
|
14223 |
+
"grad_norm": 0.0,
|
14224 |
+
"learning_rate": 2.6372874459826614e-07,
|
14225 |
+
"loss": 1.1372,
|
14226 |
+
"step": 2031
|
14227 |
+
},
|
14228 |
+
{
|
14229 |
+
"epoch": 1.902621722846442,
|
14230 |
+
"grad_norm": 0.0,
|
14231 |
+
"learning_rate": 2.5874006890266314e-07,
|
14232 |
+
"loss": 1.1608,
|
14233 |
+
"step": 2032
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 1.903558052434457,
|
14237 |
+
"grad_norm": 0.0,
|
14238 |
+
"learning_rate": 2.5379872036566997e-07,
|
14239 |
+
"loss": 1.1863,
|
14240 |
+
"step": 2033
|
14241 |
+
},
|
14242 |
+
{
|
14243 |
+
"epoch": 1.904494382022472,
|
14244 |
+
"grad_norm": 0.0,
|
14245 |
+
"learning_rate": 2.4890471083354183e-07,
|
14246 |
+
"loss": 1.1408,
|
14247 |
+
"step": 2034
|
14248 |
+
},
|
14249 |
+
{
|
14250 |
+
"epoch": 1.905430711610487,
|
14251 |
+
"grad_norm": 0.0,
|
14252 |
+
"learning_rate": 2.440580520390379e-07,
|
14253 |
+
"loss": 1.1366,
|
14254 |
+
"step": 2035
|
14255 |
+
},
|
14256 |
+
{
|
14257 |
+
"epoch": 1.9063670411985019,
|
14258 |
+
"grad_norm": 0.0,
|
14259 |
+
"learning_rate": 2.3925875560141253e-07,
|
14260 |
+
"loss": 1.1077,
|
14261 |
+
"step": 2036
|
14262 |
+
},
|
14263 |
+
{
|
14264 |
+
"epoch": 1.9073033707865168,
|
14265 |
+
"grad_norm": 0.0,
|
14266 |
+
"learning_rate": 2.3450683302636446e-07,
|
14267 |
+
"loss": 1.1399,
|
14268 |
+
"step": 2037
|
14269 |
+
},
|
14270 |
+
{
|
14271 |
+
"epoch": 1.9082397003745317,
|
14272 |
+
"grad_norm": 0.0,
|
14273 |
+
"learning_rate": 2.298022957060253e-07,
|
14274 |
+
"loss": 1.1239,
|
14275 |
+
"step": 2038
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 1.9091760299625467,
|
14279 |
+
"grad_norm": 0.0,
|
14280 |
+
"learning_rate": 2.251451549189221e-07,
|
14281 |
+
"loss": 1.1284,
|
14282 |
+
"step": 2039
|
14283 |
+
},
|
14284 |
+
{
|
14285 |
+
"epoch": 1.9101123595505618,
|
14286 |
+
"grad_norm": 0.0,
|
14287 |
+
"learning_rate": 2.2053542182996158e-07,
|
14288 |
+
"loss": 1.159,
|
14289 |
+
"step": 2040
|
14290 |
+
},
|
14291 |
+
{
|
14292 |
+
"epoch": 1.9110486891385767,
|
14293 |
+
"grad_norm": 0.0,
|
14294 |
+
"learning_rate": 2.1597310749038813e-07,
|
14295 |
+
"loss": 1.1892,
|
14296 |
+
"step": 2041
|
14297 |
+
},
|
14298 |
+
{
|
14299 |
+
"epoch": 1.9119850187265919,
|
14300 |
+
"grad_norm": 0.0,
|
14301 |
+
"learning_rate": 2.114582228377704e-07,
|
14302 |
+
"loss": 1.1312,
|
14303 |
+
"step": 2042
|
14304 |
+
},
|
14305 |
+
{
|
14306 |
+
"epoch": 1.9129213483146068,
|
14307 |
+
"grad_norm": 0.0,
|
14308 |
+
"learning_rate": 2.0699077869597017e-07,
|
14309 |
+
"loss": 1.131,
|
14310 |
+
"step": 2043
|
14311 |
+
},
|
14312 |
+
{
|
14313 |
+
"epoch": 1.9138576779026217,
|
14314 |
+
"grad_norm": 0.0,
|
14315 |
+
"learning_rate": 2.02570785775118e-07,
|
14316 |
+
"loss": 1.0921,
|
14317 |
+
"step": 2044
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 1.9147940074906367,
|
14321 |
+
"grad_norm": 0.0,
|
14322 |
+
"learning_rate": 1.9819825467158437e-07,
|
14323 |
+
"loss": 1.1463,
|
14324 |
+
"step": 2045
|
14325 |
+
},
|
14326 |
+
{
|
14327 |
+
"epoch": 1.9157303370786516,
|
14328 |
+
"grad_norm": 0.0,
|
14329 |
+
"learning_rate": 1.9387319586795516e-07,
|
14330 |
+
"loss": 1.1391,
|
14331 |
+
"step": 2046
|
14332 |
+
},
|
14333 |
+
{
|
14334 |
+
"epoch": 1.9166666666666665,
|
14335 |
+
"grad_norm": 0.0,
|
14336 |
+
"learning_rate": 1.8959561973300732e-07,
|
14337 |
+
"loss": 1.1291,
|
14338 |
+
"step": 2047
|
14339 |
+
},
|
14340 |
+
{
|
14341 |
+
"epoch": 1.9176029962546817,
|
14342 |
+
"grad_norm": 0.0,
|
14343 |
+
"learning_rate": 1.8536553652168887e-07,
|
14344 |
+
"loss": 1.1492,
|
14345 |
+
"step": 2048
|
14346 |
+
},
|
14347 |
+
{
|
14348 |
+
"epoch": 1.9185393258426966,
|
14349 |
+
"grad_norm": 0.0,
|
14350 |
+
"learning_rate": 1.8118295637507888e-07,
|
14351 |
+
"loss": 1.1043,
|
14352 |
+
"step": 2049
|
14353 |
+
},
|
14354 |
+
{
|
14355 |
+
"epoch": 1.9194756554307117,
|
14356 |
+
"grad_norm": 0.0,
|
14357 |
+
"learning_rate": 1.7704788932038974e-07,
|
14358 |
+
"loss": 1.1002,
|
14359 |
+
"step": 2050
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 1.9204119850187267,
|
14363 |
+
"grad_norm": 0.0,
|
14364 |
+
"learning_rate": 1.7296034527091387e-07,
|
14365 |
+
"loss": 1.0857,
|
14366 |
+
"step": 2051
|
14367 |
+
},
|
14368 |
+
{
|
14369 |
+
"epoch": 1.9213483146067416,
|
14370 |
+
"grad_norm": 0.0,
|
14371 |
+
"learning_rate": 1.6892033402601704e-07,
|
14372 |
+
"loss": 1.0719,
|
14373 |
+
"step": 2052
|
14374 |
+
},
|
14375 |
+
{
|
14376 |
+
"epoch": 1.9222846441947565,
|
14377 |
+
"grad_norm": 0.0,
|
14378 |
+
"learning_rate": 1.6492786527111615e-07,
|
14379 |
+
"loss": 1.1359,
|
14380 |
+
"step": 2053
|
14381 |
+
},
|
14382 |
+
{
|
14383 |
+
"epoch": 1.9232209737827715,
|
14384 |
+
"grad_norm": 0.0,
|
14385 |
+
"learning_rate": 1.6098294857764373e-07,
|
14386 |
+
"loss": 1.0802,
|
14387 |
+
"step": 2054
|
14388 |
+
},
|
14389 |
+
{
|
14390 |
+
"epoch": 1.9241573033707864,
|
14391 |
+
"grad_norm": 0.0,
|
14392 |
+
"learning_rate": 1.5708559340303465e-07,
|
14393 |
+
"loss": 1.1046,
|
14394 |
+
"step": 2055
|
14395 |
+
},
|
14396 |
+
{
|
14397 |
+
"epoch": 1.9250936329588015,
|
14398 |
+
"grad_norm": 0.0,
|
14399 |
+
"learning_rate": 1.53235809090706e-07,
|
14400 |
+
"loss": 1.1245,
|
14401 |
+
"step": 2056
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 1.9260299625468165,
|
14405 |
+
"grad_norm": 0.0,
|
14406 |
+
"learning_rate": 1.494336048700218e-07,
|
14407 |
+
"loss": 1.0975,
|
14408 |
+
"step": 2057
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 1.9269662921348316,
|
14412 |
+
"grad_norm": 0.0,
|
14413 |
+
"learning_rate": 1.4567898985628604e-07,
|
14414 |
+
"loss": 1.1512,
|
14415 |
+
"step": 2058
|
14416 |
+
},
|
14417 |
+
{
|
14418 |
+
"epoch": 1.9279026217228465,
|
14419 |
+
"grad_norm": 0.0,
|
14420 |
+
"learning_rate": 1.4197197305070964e-07,
|
14421 |
+
"loss": 1.1015,
|
14422 |
+
"step": 2059
|
14423 |
+
},
|
14424 |
+
{
|
14425 |
+
"epoch": 1.9288389513108615,
|
14426 |
+
"grad_norm": 0.0,
|
14427 |
+
"learning_rate": 1.3831256334039256e-07,
|
14428 |
+
"loss": 1.1459,
|
14429 |
+
"step": 2060
|
14430 |
+
},
|
14431 |
+
{
|
14432 |
+
"epoch": 1.9297752808988764,
|
14433 |
+
"grad_norm": 0.0,
|
14434 |
+
"learning_rate": 1.347007694983016e-07,
|
14435 |
+
"loss": 1.1436,
|
14436 |
+
"step": 2061
|
14437 |
+
},
|
14438 |
+
{
|
14439 |
+
"epoch": 1.9307116104868913,
|
14440 |
+
"grad_norm": 0.0,
|
14441 |
+
"learning_rate": 1.3113660018325925e-07,
|
14442 |
+
"loss": 1.1744,
|
14443 |
+
"step": 2062
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 1.9316479400749063,
|
14447 |
+
"grad_norm": 0.0,
|
14448 |
+
"learning_rate": 1.2762006393990388e-07,
|
14449 |
+
"loss": 1.1269,
|
14450 |
+
"step": 2063
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 1.9325842696629212,
|
14454 |
+
"grad_norm": 0.0,
|
14455 |
+
"learning_rate": 1.2415116919868298e-07,
|
14456 |
+
"loss": 1.1371,
|
14457 |
+
"step": 2064
|
14458 |
+
},
|
14459 |
+
{
|
14460 |
+
"epoch": 1.9335205992509363,
|
14461 |
+
"grad_norm": 0.0,
|
14462 |
+
"learning_rate": 1.2072992427583086e-07,
|
14463 |
+
"loss": 1.1001,
|
14464 |
+
"step": 2065
|
14465 |
+
},
|
14466 |
+
{
|
14467 |
+
"epoch": 1.9344569288389513,
|
14468 |
+
"grad_norm": 0.0,
|
14469 |
+
"learning_rate": 1.1735633737334884e-07,
|
14470 |
+
"loss": 1.0938,
|
14471 |
+
"step": 2066
|
14472 |
+
},
|
14473 |
+
{
|
14474 |
+
"epoch": 1.9353932584269664,
|
14475 |
+
"grad_norm": 0.0,
|
14476 |
+
"learning_rate": 1.1403041657897851e-07,
|
14477 |
+
"loss": 1.1522,
|
14478 |
+
"step": 2067
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 1.9363295880149813,
|
14482 |
+
"grad_norm": 0.0,
|
14483 |
+
"learning_rate": 1.107521698661973e-07,
|
14484 |
+
"loss": 1.151,
|
14485 |
+
"step": 2068
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 1.9372659176029963,
|
14489 |
+
"grad_norm": 0.0,
|
14490 |
+
"learning_rate": 1.0752160509417852e-07,
|
14491 |
+
"loss": 1.1515,
|
14492 |
+
"step": 2069
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 1.9382022471910112,
|
14496 |
+
"grad_norm": 0.0,
|
14497 |
+
"learning_rate": 1.043387300077936e-07,
|
14498 |
+
"loss": 1.1257,
|
14499 |
+
"step": 2070
|
14500 |
+
},
|
14501 |
+
{
|
14502 |
+
"epoch": 1.9391385767790261,
|
14503 |
+
"grad_norm": 0.0,
|
14504 |
+
"learning_rate": 1.0120355223758094e-07,
|
14505 |
+
"loss": 1.1321,
|
14506 |
+
"step": 2071
|
14507 |
+
},
|
14508 |
+
{
|
14509 |
+
"epoch": 1.940074906367041,
|
14510 |
+
"grad_norm": 0.0,
|
14511 |
+
"learning_rate": 9.811607929973044e-08,
|
14512 |
+
"loss": 1.0997,
|
14513 |
+
"step": 2072
|
14514 |
+
},
|
14515 |
+
{
|
14516 |
+
"epoch": 1.9410112359550562,
|
14517 |
+
"grad_norm": 0.0,
|
14518 |
+
"learning_rate": 9.507631859606125e-08,
|
14519 |
+
"loss": 1.1385,
|
14520 |
+
"step": 2073
|
14521 |
+
},
|
14522 |
+
{
|
14523 |
+
"epoch": 1.9419475655430711,
|
14524 |
+
"grad_norm": 0.0,
|
14525 |
+
"learning_rate": 9.208427741401737e-08,
|
14526 |
+
"loss": 1.1713,
|
14527 |
+
"step": 2074
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 1.9428838951310863,
|
14531 |
+
"grad_norm": 0.0,
|
14532 |
+
"learning_rate": 8.91399629266343e-08,
|
14533 |
+
"loss": 1.1093,
|
14534 |
+
"step": 2075
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 1.9438202247191012,
|
14538 |
+
"grad_norm": 0.0,
|
14539 |
+
"learning_rate": 8.624338219253458e-08,
|
14540 |
+
"loss": 1.0996,
|
14541 |
+
"step": 2076
|
14542 |
+
},
|
14543 |
+
{
|
14544 |
+
"epoch": 1.9447565543071161,
|
14545 |
+
"grad_norm": 0.0,
|
14546 |
+
"learning_rate": 8.339454215589904e-08,
|
14547 |
+
"loss": 1.1075,
|
14548 |
+
"step": 2077
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 1.945692883895131,
|
14552 |
+
"grad_norm": 0.0,
|
14553 |
+
"learning_rate": 8.059344964645999e-08,
|
14554 |
+
"loss": 1.0985,
|
14555 |
+
"step": 2078
|
14556 |
+
},
|
14557 |
+
{
|
14558 |
+
"epoch": 1.946629213483146,
|
14559 |
+
"grad_norm": 0.0,
|
14560 |
+
"learning_rate": 7.78401113794791e-08,
|
14561 |
+
"loss": 1.1248,
|
14562 |
+
"step": 2079
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 1.947565543071161,
|
14566 |
+
"grad_norm": 0.0,
|
14567 |
+
"learning_rate": 7.513453395573633e-08,
|
14568 |
+
"loss": 1.1626,
|
14569 |
+
"step": 2080
|
14570 |
+
},
|
14571 |
+
{
|
14572 |
+
"epoch": 1.948501872659176,
|
14573 |
+
"grad_norm": 0.0,
|
14574 |
+
"learning_rate": 7.247672386150761e-08,
|
14575 |
+
"loss": 1.113,
|
14576 |
+
"step": 2081
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 1.949438202247191,
|
14580 |
+
"grad_norm": 0.0,
|
14581 |
+
"learning_rate": 6.986668746855608e-08,
|
14582 |
+
"loss": 1.1539,
|
14583 |
+
"step": 2082
|
14584 |
+
},
|
14585 |
+
{
|
14586 |
+
"epoch": 1.9503745318352061,
|
14587 |
+
"grad_norm": 0.0,
|
14588 |
+
"learning_rate": 6.7304431034112e-08,
|
14589 |
+
"loss": 1.1581,
|
14590 |
+
"step": 2083
|
14591 |
+
},
|
14592 |
+
{
|
14593 |
+
"epoch": 1.951310861423221,
|
14594 |
+
"grad_norm": 0.0,
|
14595 |
+
"learning_rate": 6.47899607008573e-08,
|
14596 |
+
"loss": 1.1121,
|
14597 |
+
"step": 2084
|
14598 |
+
},
|
14599 |
+
{
|
14600 |
+
"epoch": 1.952247191011236,
|
14601 |
+
"grad_norm": 0.0,
|
14602 |
+
"learning_rate": 6.232328249691444e-08,
|
14603 |
+
"loss": 1.1383,
|
14604 |
+
"step": 2085
|
14605 |
+
},
|
14606 |
+
{
|
14607 |
+
"epoch": 1.953183520599251,
|
14608 |
+
"grad_norm": 0.0,
|
14609 |
+
"learning_rate": 5.990440233583306e-08,
|
14610 |
+
"loss": 1.1243,
|
14611 |
+
"step": 2086
|
14612 |
+
},
|
14613 |
+
{
|
14614 |
+
"epoch": 1.9541198501872659,
|
14615 |
+
"grad_norm": 0.0,
|
14616 |
+
"learning_rate": 5.7533326016570024e-08,
|
14617 |
+
"loss": 1.1301,
|
14618 |
+
"step": 2087
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 1.9550561797752808,
|
14622 |
+
"grad_norm": 0.0,
|
14623 |
+
"learning_rate": 5.521005922347611e-08,
|
14624 |
+
"loss": 1.1439,
|
14625 |
+
"step": 2088
|
14626 |
+
},
|
14627 |
+
{
|
14628 |
+
"epoch": 1.9559925093632957,
|
14629 |
+
"grad_norm": 0.0,
|
14630 |
+
"learning_rate": 5.2934607526291535e-08,
|
14631 |
+
"loss": 1.1229,
|
14632 |
+
"step": 2089
|
14633 |
+
},
|
14634 |
+
{
|
14635 |
+
"epoch": 1.9569288389513109,
|
14636 |
+
"grad_norm": 0.0,
|
14637 |
+
"learning_rate": 5.0706976380121564e-08,
|
14638 |
+
"loss": 1.1643,
|
14639 |
+
"step": 2090
|
14640 |
+
},
|
14641 |
+
{
|
14642 |
+
"epoch": 1.9578651685393258,
|
14643 |
+
"grad_norm": 0.0,
|
14644 |
+
"learning_rate": 4.8527171125425376e-08,
|
14645 |
+
"loss": 1.1357,
|
14646 |
+
"step": 2091
|
14647 |
+
},
|
14648 |
+
{
|
14649 |
+
"epoch": 1.958801498127341,
|
14650 |
+
"grad_norm": 0.0,
|
14651 |
+
"learning_rate": 4.639519698801165e-08,
|
14652 |
+
"loss": 1.1241,
|
14653 |
+
"step": 2092
|
14654 |
+
},
|
14655 |
+
{
|
14656 |
+
"epoch": 1.9597378277153559,
|
14657 |
+
"grad_norm": 0.0,
|
14658 |
+
"learning_rate": 4.4311059079016337e-08,
|
14659 |
+
"loss": 1.1751,
|
14660 |
+
"step": 2093
|
14661 |
+
},
|
14662 |
+
{
|
14663 |
+
"epoch": 1.9606741573033708,
|
14664 |
+
"grad_norm": 0.0,
|
14665 |
+
"learning_rate": 4.227476239489381e-08,
|
14666 |
+
"loss": 1.1548,
|
14667 |
+
"step": 2094
|
14668 |
+
},
|
14669 |
+
{
|
14670 |
+
"epoch": 1.9616104868913857,
|
14671 |
+
"grad_norm": 0.0,
|
14672 |
+
"learning_rate": 4.028631181740794e-08,
|
14673 |
+
"loss": 1.1137,
|
14674 |
+
"step": 2095
|
14675 |
+
},
|
14676 |
+
{
|
14677 |
+
"epoch": 1.9625468164794007,
|
14678 |
+
"grad_norm": 0.0,
|
14679 |
+
"learning_rate": 3.8345712113614374e-08,
|
14680 |
+
"loss": 1.1625,
|
14681 |
+
"step": 2096
|
14682 |
+
},
|
14683 |
+
{
|
14684 |
+
"epoch": 1.9634831460674156,
|
14685 |
+
"grad_norm": 0.0,
|
14686 |
+
"learning_rate": 3.6452967935856066e-08,
|
14687 |
+
"loss": 1.1371,
|
14688 |
+
"step": 2097
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 1.9644194756554307,
|
14692 |
+
"grad_norm": 0.0,
|
14693 |
+
"learning_rate": 3.46080838217433e-08,
|
14694 |
+
"loss": 1.0739,
|
14695 |
+
"step": 2098
|
14696 |
+
},
|
14697 |
+
{
|
14698 |
+
"epoch": 1.9653558052434457,
|
14699 |
+
"grad_norm": 0.0,
|
14700 |
+
"learning_rate": 3.28110641941537e-08,
|
14701 |
+
"loss": 1.1595,
|
14702 |
+
"step": 2099
|
14703 |
+
},
|
14704 |
+
{
|
14705 |
+
"epoch": 1.9662921348314608,
|
14706 |
+
"grad_norm": 0.0,
|
14707 |
+
"learning_rate": 3.106191336121222e-08,
|
14708 |
+
"loss": 1.1671,
|
14709 |
+
"step": 2100
|
14710 |
}
|
14711 |
],
|
14712 |
"logging_steps": 1,
|
|
|
14726 |
"attributes": {}
|
14727 |
}
|
14728 |
},
|
14729 |
+
"total_flos": 6.871693999535555e+18,
|
14730 |
"train_batch_size": 8,
|
14731 |
"trial_name": null,
|
14732 |
"trial_params": null
|