Training in progress, step 22491, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1856040378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e63155d8a8b26707565ffcd0b461dea5ebc052fa146ea4fa7e98b26ada3cad43
|
3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 928000378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee8333b033064df00b889db112a3d891431c19c228f04607ae854217d617b7a2
|
3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b02b5189317f107b47761a33d54767287c36086e7e62e8c0878c096bde5797b4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61686b58ac0c30ea6e7d706b512b9f5ec51fe76d0d476c61783415414e98dcf0
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.4320533275604248,
|
3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-22000",
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -77359,6 +77359,1721 @@
|
|
77359 |
"eval_samples_per_second": 554.141,
|
77360 |
"eval_steps_per_second": 69.288,
|
77361 |
"step": 22000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77362 |
}
|
77363 |
],
|
77364 |
"logging_steps": 2,
|
@@ -77373,12 +79088,12 @@
|
|
77373 |
"should_evaluate": false,
|
77374 |
"should_log": false,
|
77375 |
"should_save": true,
|
77376 |
-
"should_training_stop":
|
77377 |
},
|
77378 |
"attributes": {}
|
77379 |
}
|
77380 |
},
|
77381 |
-
"total_flos": 4.
|
77382 |
"train_batch_size": 768,
|
77383 |
"trial_name": null,
|
77384 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.4320533275604248,
|
3 |
"best_model_checkpoint": "model/chessformer-3/checkpoint-22000",
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 22491,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
77359 |
"eval_samples_per_second": 554.141,
|
77360 |
"eval_steps_per_second": 69.288,
|
77361 |
"step": 22000
|
77362 |
+
},
|
77363 |
+
{
|
77364 |
+
"epoch": 0.9782579698546086,
|
77365 |
+
"grad_norm": 0.057963717728853226,
|
77366 |
+
"learning_rate": 1.2718103874865827e-06,
|
77367 |
+
"loss": 1.4517,
|
77368 |
+
"step": 22002
|
77369 |
+
},
|
77370 |
+
{
|
77371 |
+
"epoch": 0.9783468943132808,
|
77372 |
+
"grad_norm": 0.05782179906964302,
|
77373 |
+
"learning_rate": 1.261411450722838e-06,
|
77374 |
+
"loss": 1.4397,
|
77375 |
+
"step": 22004
|
77376 |
+
},
|
77377 |
+
{
|
77378 |
+
"epoch": 0.9784358187719532,
|
77379 |
+
"grad_norm": 0.05823444202542305,
|
77380 |
+
"learning_rate": 1.2510551483315146e-06,
|
77381 |
+
"loss": 1.4472,
|
77382 |
+
"step": 22006
|
77383 |
+
},
|
77384 |
+
{
|
77385 |
+
"epoch": 0.9785247432306255,
|
77386 |
+
"grad_norm": 0.057919129729270935,
|
77387 |
+
"learning_rate": 1.2407414811979601e-06,
|
77388 |
+
"loss": 1.4441,
|
77389 |
+
"step": 22008
|
77390 |
+
},
|
77391 |
+
{
|
77392 |
+
"epoch": 0.9786136676892979,
|
77393 |
+
"grad_norm": 0.058388952165842056,
|
77394 |
+
"learning_rate": 1.2304704502037467e-06,
|
77395 |
+
"loss": 1.4497,
|
77396 |
+
"step": 22010
|
77397 |
+
},
|
77398 |
+
{
|
77399 |
+
"epoch": 0.9787025921479703,
|
77400 |
+
"grad_norm": 0.057490069419145584,
|
77401 |
+
"learning_rate": 1.2202420562268946e-06,
|
77402 |
+
"loss": 1.4493,
|
77403 |
+
"step": 22012
|
77404 |
+
},
|
77405 |
+
{
|
77406 |
+
"epoch": 0.9787915166066427,
|
77407 |
+
"grad_norm": 0.05721807852387428,
|
77408 |
+
"learning_rate": 1.2100563001418708e-06,
|
77409 |
+
"loss": 1.4464,
|
77410 |
+
"step": 22014
|
77411 |
+
},
|
77412 |
+
{
|
77413 |
+
"epoch": 0.978880441065315,
|
77414 |
+
"grad_norm": 0.057543035596609116,
|
77415 |
+
"learning_rate": 1.1999131828192567e-06,
|
77416 |
+
"loss": 1.4467,
|
77417 |
+
"step": 22016
|
77418 |
+
},
|
77419 |
+
{
|
77420 |
+
"epoch": 0.9789693655239874,
|
77421 |
+
"grad_norm": 0.057324040681123734,
|
77422 |
+
"learning_rate": 1.1898127051262476e-06,
|
77423 |
+
"loss": 1.4435,
|
77424 |
+
"step": 22018
|
77425 |
+
},
|
77426 |
+
{
|
77427 |
+
"epoch": 0.9790582899826598,
|
77428 |
+
"grad_norm": 0.05752379819750786,
|
77429 |
+
"learning_rate": 1.1797548679262638e-06,
|
77430 |
+
"loss": 1.4421,
|
77431 |
+
"step": 22020
|
77432 |
+
},
|
77433 |
+
{
|
77434 |
+
"epoch": 0.9791472144413321,
|
77435 |
+
"grad_norm": 0.05681290104985237,
|
77436 |
+
"learning_rate": 1.1697396720790065e-06,
|
77437 |
+
"loss": 1.4482,
|
77438 |
+
"step": 22022
|
77439 |
+
},
|
77440 |
+
{
|
77441 |
+
"epoch": 0.9792361389000045,
|
77442 |
+
"grad_norm": 0.05741885304450989,
|
77443 |
+
"learning_rate": 1.15976711844068e-06,
|
77444 |
+
"loss": 1.4459,
|
77445 |
+
"step": 22024
|
77446 |
+
},
|
77447 |
+
{
|
77448 |
+
"epoch": 0.9793250633586768,
|
77449 |
+
"grad_norm": 0.05764324218034744,
|
77450 |
+
"learning_rate": 1.1498372078638243e-06,
|
77451 |
+
"loss": 1.4434,
|
77452 |
+
"step": 22026
|
77453 |
+
},
|
77454 |
+
{
|
77455 |
+
"epoch": 0.9794139878173491,
|
77456 |
+
"grad_norm": 0.05784038081765175,
|
77457 |
+
"learning_rate": 1.1399499411972048e-06,
|
77458 |
+
"loss": 1.4504,
|
77459 |
+
"step": 22028
|
77460 |
+
},
|
77461 |
+
{
|
77462 |
+
"epoch": 0.9795029122760215,
|
77463 |
+
"grad_norm": 0.05776010826230049,
|
77464 |
+
"learning_rate": 1.13010531928609e-06,
|
77465 |
+
"loss": 1.4494,
|
77466 |
+
"step": 22030
|
77467 |
+
},
|
77468 |
+
{
|
77469 |
+
"epoch": 0.9795918367346939,
|
77470 |
+
"grad_norm": 0.05751819536089897,
|
77471 |
+
"learning_rate": 1.1203033429719734e-06,
|
77472 |
+
"loss": 1.4483,
|
77473 |
+
"step": 22032
|
77474 |
+
},
|
77475 |
+
{
|
77476 |
+
"epoch": 0.9796807611933662,
|
77477 |
+
"grad_norm": 0.05655774101614952,
|
77478 |
+
"learning_rate": 1.1105440130929067e-06,
|
77479 |
+
"loss": 1.4474,
|
77480 |
+
"step": 22034
|
77481 |
+
},
|
77482 |
+
{
|
77483 |
+
"epoch": 0.9797696856520386,
|
77484 |
+
"grad_norm": 0.0575784407556057,
|
77485 |
+
"learning_rate": 1.1008273304830008e-06,
|
77486 |
+
"loss": 1.4497,
|
77487 |
+
"step": 22036
|
77488 |
+
},
|
77489 |
+
{
|
77490 |
+
"epoch": 0.979858610110711,
|
77491 |
+
"grad_norm": 0.05941782891750336,
|
77492 |
+
"learning_rate": 1.0911532959729797e-06,
|
77493 |
+
"loss": 1.45,
|
77494 |
+
"step": 22038
|
77495 |
+
},
|
77496 |
+
{
|
77497 |
+
"epoch": 0.9799475345693833,
|
77498 |
+
"grad_norm": 0.05766258016228676,
|
77499 |
+
"learning_rate": 1.0815219103897933e-06,
|
77500 |
+
"loss": 1.4442,
|
77501 |
+
"step": 22040
|
77502 |
+
},
|
77503 |
+
{
|
77504 |
+
"epoch": 0.9800364590280557,
|
77505 |
+
"grad_norm": 0.056954920291900635,
|
77506 |
+
"learning_rate": 1.0719331745567828e-06,
|
77507 |
+
"loss": 1.4441,
|
77508 |
+
"step": 22042
|
77509 |
+
},
|
77510 |
+
{
|
77511 |
+
"epoch": 0.9801253834867281,
|
77512 |
+
"grad_norm": 0.05810870602726936,
|
77513 |
+
"learning_rate": 1.0623870892936261e-06,
|
77514 |
+
"loss": 1.4414,
|
77515 |
+
"step": 22044
|
77516 |
+
},
|
77517 |
+
{
|
77518 |
+
"epoch": 0.9802143079454004,
|
77519 |
+
"grad_norm": 0.05758174881339073,
|
77520 |
+
"learning_rate": 1.0528836554163368e-06,
|
77521 |
+
"loss": 1.4461,
|
77522 |
+
"step": 22046
|
77523 |
+
},
|
77524 |
+
{
|
77525 |
+
"epoch": 0.9803032324040727,
|
77526 |
+
"grad_norm": 0.05731727182865143,
|
77527 |
+
"learning_rate": 1.0434228737373764e-06,
|
77528 |
+
"loss": 1.4426,
|
77529 |
+
"step": 22048
|
77530 |
+
},
|
77531 |
+
{
|
77532 |
+
"epoch": 0.9803921568627451,
|
77533 |
+
"grad_norm": 0.05725093558430672,
|
77534 |
+
"learning_rate": 1.034004745065431e-06,
|
77535 |
+
"loss": 1.4474,
|
77536 |
+
"step": 22050
|
77537 |
+
},
|
77538 |
+
{
|
77539 |
+
"epoch": 0.9804810813214174,
|
77540 |
+
"grad_norm": 0.0574093721807003,
|
77541 |
+
"learning_rate": 1.0246292702056348e-06,
|
77542 |
+
"loss": 1.4473,
|
77543 |
+
"step": 22052
|
77544 |
+
},
|
77545 |
+
{
|
77546 |
+
"epoch": 0.9805700057800898,
|
77547 |
+
"grad_norm": 0.057217229157686234,
|
77548 |
+
"learning_rate": 1.0152964499594575e-06,
|
77549 |
+
"loss": 1.4416,
|
77550 |
+
"step": 22054
|
77551 |
+
},
|
77552 |
+
{
|
77553 |
+
"epoch": 0.9806589302387622,
|
77554 |
+
"grad_norm": 0.05822160094976425,
|
77555 |
+
"learning_rate": 1.0060062851247053e-06,
|
77556 |
+
"loss": 1.4464,
|
77557 |
+
"step": 22056
|
77558 |
+
},
|
77559 |
+
{
|
77560 |
+
"epoch": 0.9807478546974345,
|
77561 |
+
"grad_norm": 0.057691022753715515,
|
77562 |
+
"learning_rate": 9.967587764955211e-07,
|
77563 |
+
"loss": 1.4492,
|
77564 |
+
"step": 22058
|
77565 |
+
},
|
77566 |
+
{
|
77567 |
+
"epoch": 0.9808367791561069,
|
77568 |
+
"grad_norm": 0.056380316615104675,
|
77569 |
+
"learning_rate": 9.875539248624388e-07,
|
77570 |
+
"loss": 1.4399,
|
77571 |
+
"step": 22060
|
77572 |
+
},
|
77573 |
+
{
|
77574 |
+
"epoch": 0.9809257036147793,
|
77575 |
+
"grad_norm": 0.058779843151569366,
|
77576 |
+
"learning_rate": 9.783917310122737e-07,
|
77577 |
+
"loss": 1.4478,
|
77578 |
+
"step": 22062
|
77579 |
+
},
|
77580 |
+
{
|
77581 |
+
"epoch": 0.9810146280734516,
|
77582 |
+
"grad_norm": 0.058194488286972046,
|
77583 |
+
"learning_rate": 9.692721957283435e-07,
|
77584 |
+
"loss": 1.4477,
|
77585 |
+
"step": 22064
|
77586 |
+
},
|
77587 |
+
{
|
77588 |
+
"epoch": 0.981103552532124,
|
77589 |
+
"grad_norm": 0.05809153616428375,
|
77590 |
+
"learning_rate": 9.601953197901913e-07,
|
77591 |
+
"loss": 1.4381,
|
77592 |
+
"step": 22066
|
77593 |
+
},
|
77594 |
+
{
|
77595 |
+
"epoch": 0.9811924769907964,
|
77596 |
+
"grad_norm": 0.05672544613480568,
|
77597 |
+
"learning_rate": 9.511611039737522e-07,
|
77598 |
+
"loss": 1.4442,
|
77599 |
+
"step": 22068
|
77600 |
+
},
|
77601 |
+
{
|
77602 |
+
"epoch": 0.9812814014494686,
|
77603 |
+
"grad_norm": 0.058622874319553375,
|
77604 |
+
"learning_rate": 9.421695490512416e-07,
|
77605 |
+
"loss": 1.4394,
|
77606 |
+
"step": 22070
|
77607 |
+
},
|
77608 |
+
{
|
77609 |
+
"epoch": 0.981370325908141,
|
77610 |
+
"grad_norm": 0.0575464628636837,
|
77611 |
+
"learning_rate": 9.332206557914336e-07,
|
77612 |
+
"loss": 1.447,
|
77613 |
+
"step": 22072
|
77614 |
+
},
|
77615 |
+
{
|
77616 |
+
"epoch": 0.9814592503668134,
|
77617 |
+
"grad_norm": 0.057317234575748444,
|
77618 |
+
"learning_rate": 9.243144249591606e-07,
|
77619 |
+
"loss": 1.4467,
|
77620 |
+
"step": 22074
|
77621 |
+
},
|
77622 |
+
{
|
77623 |
+
"epoch": 0.9815481748254857,
|
77624 |
+
"grad_norm": 0.05799569934606552,
|
77625 |
+
"learning_rate": 9.154508573158693e-07,
|
77626 |
+
"loss": 1.4454,
|
77627 |
+
"step": 22076
|
77628 |
+
},
|
77629 |
+
{
|
77630 |
+
"epoch": 0.9816370992841581,
|
77631 |
+
"grad_norm": 0.05822969973087311,
|
77632 |
+
"learning_rate": 9.066299536192313e-07,
|
77633 |
+
"loss": 1.4448,
|
77634 |
+
"step": 22078
|
77635 |
+
},
|
77636 |
+
{
|
77637 |
+
"epoch": 0.9817260237428305,
|
77638 |
+
"grad_norm": 0.058205485343933105,
|
77639 |
+
"learning_rate": 8.9785171462331e-07,
|
77640 |
+
"loss": 1.455,
|
77641 |
+
"step": 22080
|
77642 |
+
},
|
77643 |
+
{
|
77644 |
+
"epoch": 0.9818149482015028,
|
77645 |
+
"grad_norm": 0.057623036205768585,
|
77646 |
+
"learning_rate": 8.891161410785053e-07,
|
77647 |
+
"loss": 1.4465,
|
77648 |
+
"step": 22082
|
77649 |
+
},
|
77650 |
+
{
|
77651 |
+
"epoch": 0.9819038726601752,
|
77652 |
+
"grad_norm": 0.056971605867147446,
|
77653 |
+
"learning_rate": 8.804232337315532e-07,
|
77654 |
+
"loss": 1.4439,
|
77655 |
+
"step": 22084
|
77656 |
+
},
|
77657 |
+
{
|
77658 |
+
"epoch": 0.9819927971188476,
|
77659 |
+
"grad_norm": 0.056623686105012894,
|
77660 |
+
"learning_rate": 8.717729933255814e-07,
|
77661 |
+
"loss": 1.4419,
|
77662 |
+
"step": 22086
|
77663 |
+
},
|
77664 |
+
{
|
77665 |
+
"epoch": 0.9820817215775199,
|
77666 |
+
"grad_norm": 0.05784169211983681,
|
77667 |
+
"learning_rate": 8.631654206000539e-07,
|
77668 |
+
"loss": 1.4461,
|
77669 |
+
"step": 22088
|
77670 |
+
},
|
77671 |
+
{
|
77672 |
+
"epoch": 0.9821706460361923,
|
77673 |
+
"grad_norm": 0.05700985714793205,
|
77674 |
+
"learning_rate": 8.546005162907156e-07,
|
77675 |
+
"loss": 1.4379,
|
77676 |
+
"step": 22090
|
77677 |
+
},
|
77678 |
+
{
|
77679 |
+
"epoch": 0.9822595704948646,
|
77680 |
+
"grad_norm": 0.057645298540592194,
|
77681 |
+
"learning_rate": 8.46078281129814e-07,
|
77682 |
+
"loss": 1.4479,
|
77683 |
+
"step": 22092
|
77684 |
+
},
|
77685 |
+
{
|
77686 |
+
"epoch": 0.9823484949535369,
|
77687 |
+
"grad_norm": 0.057107895612716675,
|
77688 |
+
"learning_rate": 8.375987158458775e-07,
|
77689 |
+
"loss": 1.448,
|
77690 |
+
"step": 22094
|
77691 |
+
},
|
77692 |
+
{
|
77693 |
+
"epoch": 0.9824374194122093,
|
77694 |
+
"grad_norm": 0.05772538483142853,
|
77695 |
+
"learning_rate": 8.291618211637153e-07,
|
77696 |
+
"loss": 1.4449,
|
77697 |
+
"step": 22096
|
77698 |
+
},
|
77699 |
+
{
|
77700 |
+
"epoch": 0.9825263438708817,
|
77701 |
+
"grad_norm": 0.05685894191265106,
|
77702 |
+
"learning_rate": 8.207675978045281e-07,
|
77703 |
+
"loss": 1.4485,
|
77704 |
+
"step": 22098
|
77705 |
+
},
|
77706 |
+
{
|
77707 |
+
"epoch": 0.982615268329554,
|
77708 |
+
"grad_norm": 0.058153510093688965,
|
77709 |
+
"learning_rate": 8.124160464859642e-07,
|
77710 |
+
"loss": 1.4477,
|
77711 |
+
"step": 22100
|
77712 |
+
},
|
77713 |
+
{
|
77714 |
+
"epoch": 0.9827041927882264,
|
77715 |
+
"grad_norm": 0.057993143796920776,
|
77716 |
+
"learning_rate": 8.041071679219525e-07,
|
77717 |
+
"loss": 1.4428,
|
77718 |
+
"step": 22102
|
77719 |
+
},
|
77720 |
+
{
|
77721 |
+
"epoch": 0.9827931172468988,
|
77722 |
+
"grad_norm": 0.0569118969142437,
|
77723 |
+
"learning_rate": 7.95840962822758e-07,
|
77724 |
+
"loss": 1.4464,
|
77725 |
+
"step": 22104
|
77726 |
+
},
|
77727 |
+
{
|
77728 |
+
"epoch": 0.9828820417055711,
|
77729 |
+
"grad_norm": 0.057352546602487564,
|
77730 |
+
"learning_rate": 7.876174318949824e-07,
|
77731 |
+
"loss": 1.4476,
|
77732 |
+
"step": 22106
|
77733 |
+
},
|
77734 |
+
{
|
77735 |
+
"epoch": 0.9829709661642435,
|
77736 |
+
"grad_norm": 0.05711887776851654,
|
77737 |
+
"learning_rate": 7.794365758416188e-07,
|
77738 |
+
"loss": 1.443,
|
77739 |
+
"step": 22108
|
77740 |
+
},
|
77741 |
+
{
|
77742 |
+
"epoch": 0.9830598906229159,
|
77743 |
+
"grad_norm": 0.058069389313459396,
|
77744 |
+
"learning_rate": 7.712983953619967e-07,
|
77745 |
+
"loss": 1.4485,
|
77746 |
+
"step": 22110
|
77747 |
+
},
|
77748 |
+
{
|
77749 |
+
"epoch": 0.9831488150815882,
|
77750 |
+
"grad_norm": 0.05813687667250633,
|
77751 |
+
"learning_rate": 7.632028911518374e-07,
|
77752 |
+
"loss": 1.4526,
|
77753 |
+
"step": 22112
|
77754 |
+
},
|
77755 |
+
{
|
77756 |
+
"epoch": 0.9832377395402605,
|
77757 |
+
"grad_norm": 0.05850476771593094,
|
77758 |
+
"learning_rate": 7.551500639031427e-07,
|
77759 |
+
"loss": 1.4463,
|
77760 |
+
"step": 22114
|
77761 |
+
},
|
77762 |
+
{
|
77763 |
+
"epoch": 0.9833266639989329,
|
77764 |
+
"grad_norm": 0.0575152225792408,
|
77765 |
+
"learning_rate": 7.471399143043067e-07,
|
77766 |
+
"loss": 1.4549,
|
77767 |
+
"step": 22116
|
77768 |
+
},
|
77769 |
+
{
|
77770 |
+
"epoch": 0.9834155884576052,
|
77771 |
+
"grad_norm": 0.05797769874334335,
|
77772 |
+
"learning_rate": 7.391724430401148e-07,
|
77773 |
+
"loss": 1.449,
|
77774 |
+
"step": 22118
|
77775 |
+
},
|
77776 |
+
{
|
77777 |
+
"epoch": 0.9835045129162776,
|
77778 |
+
"grad_norm": 0.056744713336229324,
|
77779 |
+
"learning_rate": 7.312476507916332e-07,
|
77780 |
+
"loss": 1.4456,
|
77781 |
+
"step": 22120
|
77782 |
+
},
|
77783 |
+
{
|
77784 |
+
"epoch": 0.98359343737495,
|
77785 |
+
"grad_norm": 0.057250816375017166,
|
77786 |
+
"learning_rate": 7.233655382363202e-07,
|
77787 |
+
"loss": 1.4396,
|
77788 |
+
"step": 22122
|
77789 |
+
},
|
77790 |
+
{
|
77791 |
+
"epoch": 0.9836823618336223,
|
77792 |
+
"grad_norm": 0.05758194625377655,
|
77793 |
+
"learning_rate": 7.155261060479701e-07,
|
77794 |
+
"loss": 1.4521,
|
77795 |
+
"step": 22124
|
77796 |
+
},
|
77797 |
+
{
|
77798 |
+
"epoch": 0.9837712862922947,
|
77799 |
+
"grad_norm": 0.05834776535630226,
|
77800 |
+
"learning_rate": 7.077293548966579e-07,
|
77801 |
+
"loss": 1.4438,
|
77802 |
+
"step": 22126
|
77803 |
+
},
|
77804 |
+
{
|
77805 |
+
"epoch": 0.9838602107509671,
|
77806 |
+
"grad_norm": 0.0584811232984066,
|
77807 |
+
"learning_rate": 6.999752854490171e-07,
|
77808 |
+
"loss": 1.4453,
|
77809 |
+
"step": 22128
|
77810 |
+
},
|
77811 |
+
{
|
77812 |
+
"epoch": 0.9839491352096394,
|
77813 |
+
"grad_norm": 0.05721158906817436,
|
77814 |
+
"learning_rate": 6.922638983677954e-07,
|
77815 |
+
"loss": 1.4502,
|
77816 |
+
"step": 22130
|
77817 |
+
},
|
77818 |
+
{
|
77819 |
+
"epoch": 0.9840380596683118,
|
77820 |
+
"grad_norm": 0.05716519057750702,
|
77821 |
+
"learning_rate": 6.84595194312243e-07,
|
77822 |
+
"loss": 1.4425,
|
77823 |
+
"step": 22132
|
77824 |
+
},
|
77825 |
+
{
|
77826 |
+
"epoch": 0.9841269841269841,
|
77827 |
+
"grad_norm": 0.05842083320021629,
|
77828 |
+
"learning_rate": 6.769691739378913e-07,
|
77829 |
+
"loss": 1.4459,
|
77830 |
+
"step": 22134
|
77831 |
+
},
|
77832 |
+
{
|
77833 |
+
"epoch": 0.9842159085856564,
|
77834 |
+
"grad_norm": 0.05762701854109764,
|
77835 |
+
"learning_rate": 6.693858378967188e-07,
|
77836 |
+
"loss": 1.4475,
|
77837 |
+
"step": 22136
|
77838 |
+
},
|
77839 |
+
{
|
77840 |
+
"epoch": 0.9843048330443288,
|
77841 |
+
"grad_norm": 0.0569646880030632,
|
77842 |
+
"learning_rate": 6.618451868368736e-07,
|
77843 |
+
"loss": 1.4453,
|
77844 |
+
"step": 22138
|
77845 |
+
},
|
77846 |
+
{
|
77847 |
+
"epoch": 0.9843937575030012,
|
77848 |
+
"grad_norm": 0.056726690381765366,
|
77849 |
+
"learning_rate": 6.543472214030066e-07,
|
77850 |
+
"loss": 1.4471,
|
77851 |
+
"step": 22140
|
77852 |
+
},
|
77853 |
+
{
|
77854 |
+
"epoch": 0.9844826819616735,
|
77855 |
+
"grad_norm": 0.056243959814310074,
|
77856 |
+
"learning_rate": 6.468919422361052e-07,
|
77857 |
+
"loss": 1.4397,
|
77858 |
+
"step": 22142
|
77859 |
+
},
|
77860 |
+
{
|
77861 |
+
"epoch": 0.9845716064203459,
|
77862 |
+
"grad_norm": 0.058017753064632416,
|
77863 |
+
"learning_rate": 6.394793499734375e-07,
|
77864 |
+
"loss": 1.4452,
|
77865 |
+
"step": 22144
|
77866 |
+
},
|
77867 |
+
{
|
77868 |
+
"epoch": 0.9846605308790183,
|
77869 |
+
"grad_norm": 0.057117003947496414,
|
77870 |
+
"learning_rate": 6.321094452487186e-07,
|
77871 |
+
"loss": 1.4457,
|
77872 |
+
"step": 22146
|
77873 |
+
},
|
77874 |
+
{
|
77875 |
+
"epoch": 0.9847494553376906,
|
77876 |
+
"grad_norm": 0.059118859469890594,
|
77877 |
+
"learning_rate": 6.247822286918892e-07,
|
77878 |
+
"loss": 1.4479,
|
77879 |
+
"step": 22148
|
77880 |
+
},
|
77881 |
+
{
|
77882 |
+
"epoch": 0.984838379796363,
|
77883 |
+
"grad_norm": 0.05744896084070206,
|
77884 |
+
"learning_rate": 6.174977009293925e-07,
|
77885 |
+
"loss": 1.4502,
|
77886 |
+
"step": 22150
|
77887 |
+
},
|
77888 |
+
{
|
77889 |
+
"epoch": 0.9849273042550354,
|
77890 |
+
"grad_norm": 0.05781065300107002,
|
77891 |
+
"learning_rate": 6.102558625838417e-07,
|
77892 |
+
"loss": 1.4416,
|
77893 |
+
"step": 22152
|
77894 |
+
},
|
77895 |
+
{
|
77896 |
+
"epoch": 0.9850162287137078,
|
77897 |
+
"grad_norm": 0.057693760842084885,
|
77898 |
+
"learning_rate": 6.030567142744081e-07,
|
77899 |
+
"loss": 1.4452,
|
77900 |
+
"step": 22154
|
77901 |
+
},
|
77902 |
+
{
|
77903 |
+
"epoch": 0.98510515317238,
|
77904 |
+
"grad_norm": 0.05738704651594162,
|
77905 |
+
"learning_rate": 5.959002566164328e-07,
|
77906 |
+
"loss": 1.4488,
|
77907 |
+
"step": 22156
|
77908 |
+
},
|
77909 |
+
{
|
77910 |
+
"epoch": 0.9851940776310524,
|
77911 |
+
"grad_norm": 0.05740581080317497,
|
77912 |
+
"learning_rate": 5.887864902217044e-07,
|
77913 |
+
"loss": 1.4462,
|
77914 |
+
"step": 22158
|
77915 |
+
},
|
77916 |
+
{
|
77917 |
+
"epoch": 0.9852830020897247,
|
77918 |
+
"grad_norm": 0.057811297476291656,
|
77919 |
+
"learning_rate": 5.817154156983473e-07,
|
77920 |
+
"loss": 1.4463,
|
77921 |
+
"step": 22160
|
77922 |
+
},
|
77923 |
+
{
|
77924 |
+
"epoch": 0.9853719265483971,
|
77925 |
+
"grad_norm": 0.0571342296898365,
|
77926 |
+
"learning_rate": 5.746870336508225e-07,
|
77927 |
+
"loss": 1.4455,
|
77928 |
+
"step": 22162
|
77929 |
+
},
|
77930 |
+
{
|
77931 |
+
"epoch": 0.9854608510070695,
|
77932 |
+
"grad_norm": 0.05744681507349014,
|
77933 |
+
"learning_rate": 5.677013446799828e-07,
|
77934 |
+
"loss": 1.4499,
|
77935 |
+
"step": 22164
|
77936 |
+
},
|
77937 |
+
{
|
77938 |
+
"epoch": 0.9855497754657419,
|
77939 |
+
"grad_norm": 0.05767764896154404,
|
77940 |
+
"learning_rate": 5.60758349382906e-07,
|
77941 |
+
"loss": 1.4425,
|
77942 |
+
"step": 22166
|
77943 |
+
},
|
77944 |
+
{
|
77945 |
+
"epoch": 0.9856386999244142,
|
77946 |
+
"grad_norm": 0.05785730853676796,
|
77947 |
+
"learning_rate": 5.53858048353173e-07,
|
77948 |
+
"loss": 1.4449,
|
77949 |
+
"step": 22168
|
77950 |
+
},
|
77951 |
+
{
|
77952 |
+
"epoch": 0.9857276243830866,
|
77953 |
+
"grad_norm": 0.058226075023412704,
|
77954 |
+
"learning_rate": 5.470004421806452e-07,
|
77955 |
+
"loss": 1.4434,
|
77956 |
+
"step": 22170
|
77957 |
+
},
|
77958 |
+
{
|
77959 |
+
"epoch": 0.985816548841759,
|
77960 |
+
"grad_norm": 0.057991281151771545,
|
77961 |
+
"learning_rate": 5.401855314515758e-07,
|
77962 |
+
"loss": 1.445,
|
77963 |
+
"step": 22172
|
77964 |
+
},
|
77965 |
+
{
|
77966 |
+
"epoch": 0.9859054733004313,
|
77967 |
+
"grad_norm": 0.05784721300005913,
|
77968 |
+
"learning_rate": 5.334133167484434e-07,
|
77969 |
+
"loss": 1.448,
|
77970 |
+
"step": 22174
|
77971 |
+
},
|
77972 |
+
{
|
77973 |
+
"epoch": 0.9859943977591037,
|
77974 |
+
"grad_norm": 0.05827952176332474,
|
77975 |
+
"learning_rate": 5.266837986502294e-07,
|
77976 |
+
"loss": 1.4506,
|
77977 |
+
"step": 22176
|
77978 |
+
},
|
77979 |
+
{
|
77980 |
+
"epoch": 0.986083322217776,
|
77981 |
+
"grad_norm": 0.05748755857348442,
|
77982 |
+
"learning_rate": 5.199969777321955e-07,
|
77983 |
+
"loss": 1.4483,
|
77984 |
+
"step": 22178
|
77985 |
+
},
|
77986 |
+
{
|
77987 |
+
"epoch": 0.9861722466764483,
|
77988 |
+
"grad_norm": 0.057960059493780136,
|
77989 |
+
"learning_rate": 5.13352854565996e-07,
|
77990 |
+
"loss": 1.4474,
|
77991 |
+
"step": 22180
|
77992 |
+
},
|
77993 |
+
{
|
77994 |
+
"epoch": 0.9862611711351207,
|
77995 |
+
"grad_norm": 0.05742307007312775,
|
77996 |
+
"learning_rate": 5.067514297195098e-07,
|
77997 |
+
"loss": 1.4512,
|
77998 |
+
"step": 22182
|
77999 |
+
},
|
78000 |
+
{
|
78001 |
+
"epoch": 0.986350095593793,
|
78002 |
+
"grad_norm": 0.05780716612935066,
|
78003 |
+
"learning_rate": 5.001927037571186e-07,
|
78004 |
+
"loss": 1.4459,
|
78005 |
+
"step": 22184
|
78006 |
+
},
|
78007 |
+
{
|
78008 |
+
"epoch": 0.9864390200524654,
|
78009 |
+
"grad_norm": 0.0577070526778698,
|
78010 |
+
"learning_rate": 4.936766772394851e-07,
|
78011 |
+
"loss": 1.4409,
|
78012 |
+
"step": 22186
|
78013 |
+
},
|
78014 |
+
{
|
78015 |
+
"epoch": 0.9865279445111378,
|
78016 |
+
"grad_norm": 0.057526715099811554,
|
78017 |
+
"learning_rate": 4.872033507236084e-07,
|
78018 |
+
"loss": 1.4471,
|
78019 |
+
"step": 22188
|
78020 |
+
},
|
78021 |
+
{
|
78022 |
+
"epoch": 0.9866168689698102,
|
78023 |
+
"grad_norm": 0.05801134556531906,
|
78024 |
+
"learning_rate": 4.80772724762879e-07,
|
78025 |
+
"loss": 1.4386,
|
78026 |
+
"step": 22190
|
78027 |
+
},
|
78028 |
+
{
|
78029 |
+
"epoch": 0.9867057934284825,
|
78030 |
+
"grad_norm": 0.05754433944821358,
|
78031 |
+
"learning_rate": 4.743847999070239e-07,
|
78032 |
+
"loss": 1.4458,
|
78033 |
+
"step": 22192
|
78034 |
+
},
|
78035 |
+
{
|
78036 |
+
"epoch": 0.9867947178871549,
|
78037 |
+
"grad_norm": 0.05766000971198082,
|
78038 |
+
"learning_rate": 4.680395767021062e-07,
|
78039 |
+
"loss": 1.4399,
|
78040 |
+
"step": 22194
|
78041 |
+
},
|
78042 |
+
{
|
78043 |
+
"epoch": 0.9868836423458273,
|
78044 |
+
"grad_norm": 0.0569644533097744,
|
78045 |
+
"learning_rate": 4.617370556904699e-07,
|
78046 |
+
"loss": 1.4417,
|
78047 |
+
"step": 22196
|
78048 |
+
},
|
78049 |
+
{
|
78050 |
+
"epoch": 0.9869725668044996,
|
78051 |
+
"grad_norm": 0.05827400088310242,
|
78052 |
+
"learning_rate": 4.554772374110172e-07,
|
78053 |
+
"loss": 1.4557,
|
78054 |
+
"step": 22198
|
78055 |
+
},
|
78056 |
+
{
|
78057 |
+
"epoch": 0.9870614912631719,
|
78058 |
+
"grad_norm": 0.05730379745364189,
|
78059 |
+
"learning_rate": 4.4926012239870916e-07,
|
78060 |
+
"loss": 1.4435,
|
78061 |
+
"step": 22200
|
78062 |
+
},
|
78063 |
+
{
|
78064 |
+
"epoch": 0.9871504157218443,
|
78065 |
+
"grad_norm": 0.05722169205546379,
|
78066 |
+
"learning_rate": 4.4308571118517605e-07,
|
78067 |
+
"loss": 1.4427,
|
78068 |
+
"step": 22202
|
78069 |
+
},
|
78070 |
+
{
|
78071 |
+
"epoch": 0.9872393401805166,
|
78072 |
+
"grad_norm": 0.05816062167286873,
|
78073 |
+
"learning_rate": 4.369540042981068e-07,
|
78074 |
+
"loss": 1.4448,
|
78075 |
+
"step": 22204
|
78076 |
+
},
|
78077 |
+
{
|
78078 |
+
"epoch": 0.987328264639189,
|
78079 |
+
"grad_norm": 0.05683821812272072,
|
78080 |
+
"learning_rate": 4.3086500226169334e-07,
|
78081 |
+
"loss": 1.445,
|
78082 |
+
"step": 22206
|
78083 |
+
},
|
78084 |
+
{
|
78085 |
+
"epoch": 0.9874171890978614,
|
78086 |
+
"grad_norm": 0.05709170922636986,
|
78087 |
+
"learning_rate": 4.248187055965191e-07,
|
78088 |
+
"loss": 1.4453,
|
78089 |
+
"step": 22208
|
78090 |
+
},
|
78091 |
+
{
|
78092 |
+
"epoch": 0.9875061135565337,
|
78093 |
+
"grad_norm": 0.05717954784631729,
|
78094 |
+
"learning_rate": 4.1881511481939304e-07,
|
78095 |
+
"loss": 1.4421,
|
78096 |
+
"step": 22210
|
78097 |
+
},
|
78098 |
+
{
|
78099 |
+
"epoch": 0.9875950380152061,
|
78100 |
+
"grad_norm": 0.05784517154097557,
|
78101 |
+
"learning_rate": 4.1285423044351565e-07,
|
78102 |
+
"loss": 1.4496,
|
78103 |
+
"step": 22212
|
78104 |
+
},
|
78105 |
+
{
|
78106 |
+
"epoch": 0.9876839624738785,
|
78107 |
+
"grad_norm": 0.057573914527893066,
|
78108 |
+
"learning_rate": 4.0693605297842384e-07,
|
78109 |
+
"loss": 1.4458,
|
78110 |
+
"step": 22214
|
78111 |
+
},
|
78112 |
+
{
|
78113 |
+
"epoch": 0.9877728869325508,
|
78114 |
+
"grad_norm": 0.056905996054410934,
|
78115 |
+
"learning_rate": 4.0106058293015723e-07,
|
78116 |
+
"loss": 1.4495,
|
78117 |
+
"step": 22216
|
78118 |
+
},
|
78119 |
+
{
|
78120 |
+
"epoch": 0.9878618113912232,
|
78121 |
+
"grad_norm": 0.05849708244204521,
|
78122 |
+
"learning_rate": 3.952278208008697e-07,
|
78123 |
+
"loss": 1.4523,
|
78124 |
+
"step": 22218
|
78125 |
+
},
|
78126 |
+
{
|
78127 |
+
"epoch": 0.9879507358498956,
|
78128 |
+
"grad_norm": 0.0573749914765358,
|
78129 |
+
"learning_rate": 3.8943776708916247e-07,
|
78130 |
+
"loss": 1.4409,
|
78131 |
+
"step": 22220
|
78132 |
+
},
|
78133 |
+
{
|
78134 |
+
"epoch": 0.9880396603085678,
|
78135 |
+
"grad_norm": 0.05719359219074249,
|
78136 |
+
"learning_rate": 3.836904222900284e-07,
|
78137 |
+
"loss": 1.4437,
|
78138 |
+
"step": 22222
|
78139 |
+
},
|
78140 |
+
{
|
78141 |
+
"epoch": 0.9881285847672402,
|
78142 |
+
"grad_norm": 0.058678288012742996,
|
78143 |
+
"learning_rate": 3.779857868947967e-07,
|
78144 |
+
"loss": 1.4416,
|
78145 |
+
"step": 22224
|
78146 |
+
},
|
78147 |
+
{
|
78148 |
+
"epoch": 0.9882175092259126,
|
78149 |
+
"grad_norm": 0.058080919086933136,
|
78150 |
+
"learning_rate": 3.723238613910773e-07,
|
78151 |
+
"loss": 1.4414,
|
78152 |
+
"step": 22226
|
78153 |
+
},
|
78154 |
+
{
|
78155 |
+
"epoch": 0.9883064336845849,
|
78156 |
+
"grad_norm": 0.05763470381498337,
|
78157 |
+
"learning_rate": 3.6670464626292754e-07,
|
78158 |
+
"loss": 1.4479,
|
78159 |
+
"step": 22228
|
78160 |
+
},
|
78161 |
+
{
|
78162 |
+
"epoch": 0.9883953581432573,
|
78163 |
+
"grad_norm": 0.0569768100976944,
|
78164 |
+
"learning_rate": 3.611281419906853e-07,
|
78165 |
+
"loss": 1.4533,
|
78166 |
+
"step": 22230
|
78167 |
+
},
|
78168 |
+
{
|
78169 |
+
"epoch": 0.9884842826019297,
|
78170 |
+
"grad_norm": 0.05751827359199524,
|
78171 |
+
"learning_rate": 3.5559434905102497e-07,
|
78172 |
+
"loss": 1.4468,
|
78173 |
+
"step": 22232
|
78174 |
+
},
|
78175 |
+
{
|
78176 |
+
"epoch": 0.988573207060602,
|
78177 |
+
"grad_norm": 0.057300738990306854,
|
78178 |
+
"learning_rate": 3.501032679170124e-07,
|
78179 |
+
"loss": 1.4462,
|
78180 |
+
"step": 22234
|
78181 |
+
},
|
78182 |
+
{
|
78183 |
+
"epoch": 0.9886621315192744,
|
78184 |
+
"grad_norm": 0.057894255965948105,
|
78185 |
+
"learning_rate": 3.4465489905810555e-07,
|
78186 |
+
"loss": 1.444,
|
78187 |
+
"step": 22236
|
78188 |
+
},
|
78189 |
+
{
|
78190 |
+
"epoch": 0.9887510559779468,
|
78191 |
+
"grad_norm": 0.05746915191411972,
|
78192 |
+
"learning_rate": 3.392492429399874e-07,
|
78193 |
+
"loss": 1.4461,
|
78194 |
+
"step": 22238
|
78195 |
+
},
|
78196 |
+
{
|
78197 |
+
"epoch": 0.9888399804366191,
|
78198 |
+
"grad_norm": 0.05837760120630264,
|
78199 |
+
"learning_rate": 3.3388630002473273e-07,
|
78200 |
+
"loss": 1.4482,
|
78201 |
+
"step": 22240
|
78202 |
+
},
|
78203 |
+
{
|
78204 |
+
"epoch": 0.9889289048952915,
|
78205 |
+
"grad_norm": 0.05805863440036774,
|
78206 |
+
"learning_rate": 3.2856607077086374e-07,
|
78207 |
+
"loss": 1.4444,
|
78208 |
+
"step": 22242
|
78209 |
+
},
|
78210 |
+
{
|
78211 |
+
"epoch": 0.9890178293539638,
|
78212 |
+
"grad_norm": 0.056360069662332535,
|
78213 |
+
"learning_rate": 3.2328855563318326e-07,
|
78214 |
+
"loss": 1.4449,
|
78215 |
+
"step": 22244
|
78216 |
+
},
|
78217 |
+
{
|
78218 |
+
"epoch": 0.9891067538126361,
|
78219 |
+
"grad_norm": 0.05757332220673561,
|
78220 |
+
"learning_rate": 3.180537550627749e-07,
|
78221 |
+
"loss": 1.4413,
|
78222 |
+
"step": 22246
|
78223 |
+
},
|
78224 |
+
{
|
78225 |
+
"epoch": 0.9891956782713085,
|
78226 |
+
"grad_norm": 0.05767189711332321,
|
78227 |
+
"learning_rate": 3.1286166950711405e-07,
|
78228 |
+
"loss": 1.4476,
|
78229 |
+
"step": 22248
|
78230 |
+
},
|
78231 |
+
{
|
78232 |
+
"epoch": 0.9892846027299809,
|
78233 |
+
"grad_norm": 0.058686595410108566,
|
78234 |
+
"learning_rate": 3.0771229941012337e-07,
|
78235 |
+
"loss": 1.446,
|
78236 |
+
"step": 22250
|
78237 |
+
},
|
78238 |
+
{
|
78239 |
+
"epoch": 0.9893735271886532,
|
78240 |
+
"grad_norm": 0.05774107575416565,
|
78241 |
+
"learning_rate": 3.026056452119508e-07,
|
78242 |
+
"loss": 1.4448,
|
78243 |
+
"step": 22252
|
78244 |
+
},
|
78245 |
+
{
|
78246 |
+
"epoch": 0.9894624516473256,
|
78247 |
+
"grad_norm": 0.05755819380283356,
|
78248 |
+
"learning_rate": 2.975417073491915e-07,
|
78249 |
+
"loss": 1.4493,
|
78250 |
+
"step": 22254
|
78251 |
+
},
|
78252 |
+
{
|
78253 |
+
"epoch": 0.989551376105998,
|
78254 |
+
"grad_norm": 0.057851407676935196,
|
78255 |
+
"learning_rate": 2.9252048625461047e-07,
|
78256 |
+
"loss": 1.4421,
|
78257 |
+
"step": 22256
|
78258 |
+
},
|
78259 |
+
{
|
78260 |
+
"epoch": 0.9896403005646703,
|
78261 |
+
"grad_norm": 0.05904214829206467,
|
78262 |
+
"learning_rate": 2.8754198235758643e-07,
|
78263 |
+
"loss": 1.4491,
|
78264 |
+
"step": 22258
|
78265 |
+
},
|
78266 |
+
{
|
78267 |
+
"epoch": 0.9897292250233427,
|
78268 |
+
"grad_norm": 0.0583941750228405,
|
78269 |
+
"learning_rate": 2.826061960836124e-07,
|
78270 |
+
"loss": 1.4429,
|
78271 |
+
"step": 22260
|
78272 |
+
},
|
78273 |
+
{
|
78274 |
+
"epoch": 0.9898181494820151,
|
78275 |
+
"grad_norm": 0.05874110758304596,
|
78276 |
+
"learning_rate": 2.7771312785462853e-07,
|
78277 |
+
"loss": 1.4454,
|
78278 |
+
"step": 22262
|
78279 |
+
},
|
78280 |
+
{
|
78281 |
+
"epoch": 0.9899070739406873,
|
78282 |
+
"grad_norm": 0.05823364108800888,
|
78283 |
+
"learning_rate": 2.7286277808891145e-07,
|
78284 |
+
"loss": 1.45,
|
78285 |
+
"step": 22264
|
78286 |
+
},
|
78287 |
+
{
|
78288 |
+
"epoch": 0.9899959983993597,
|
78289 |
+
"grad_norm": 0.056944265961647034,
|
78290 |
+
"learning_rate": 2.6805514720112946e-07,
|
78291 |
+
"loss": 1.4472,
|
78292 |
+
"step": 22266
|
78293 |
+
},
|
78294 |
+
{
|
78295 |
+
"epoch": 0.9900849228580321,
|
78296 |
+
"grad_norm": 0.05789085850119591,
|
78297 |
+
"learning_rate": 2.632902356022315e-07,
|
78298 |
+
"loss": 1.4496,
|
78299 |
+
"step": 22268
|
78300 |
+
},
|
78301 |
+
{
|
78302 |
+
"epoch": 0.9901738473167044,
|
78303 |
+
"grad_norm": 0.058085713535547256,
|
78304 |
+
"learning_rate": 2.5856804369955854e-07,
|
78305 |
+
"loss": 1.4492,
|
78306 |
+
"step": 22270
|
78307 |
+
},
|
78308 |
+
{
|
78309 |
+
"epoch": 0.9902627717753768,
|
78310 |
+
"grad_norm": 0.05807514116168022,
|
78311 |
+
"learning_rate": 2.538885718967876e-07,
|
78312 |
+
"loss": 1.4439,
|
78313 |
+
"step": 22272
|
78314 |
+
},
|
78315 |
+
{
|
78316 |
+
"epoch": 0.9903516962340492,
|
78317 |
+
"grad_norm": 0.057060327380895615,
|
78318 |
+
"learning_rate": 2.49251820593932e-07,
|
78319 |
+
"loss": 1.4408,
|
78320 |
+
"step": 22274
|
78321 |
+
},
|
78322 |
+
{
|
78323 |
+
"epoch": 0.9904406206927215,
|
78324 |
+
"grad_norm": 0.057622525840997696,
|
78325 |
+
"learning_rate": 2.44657790187397e-07,
|
78326 |
+
"loss": 1.4477,
|
78327 |
+
"step": 22276
|
78328 |
+
},
|
78329 |
+
{
|
78330 |
+
"epoch": 0.9905295451513939,
|
78331 |
+
"grad_norm": 0.056390151381492615,
|
78332 |
+
"learning_rate": 2.401064810698128e-07,
|
78333 |
+
"loss": 1.4408,
|
78334 |
+
"step": 22278
|
78335 |
+
},
|
78336 |
+
{
|
78337 |
+
"epoch": 0.9906184696100663,
|
78338 |
+
"grad_norm": 0.05661400407552719,
|
78339 |
+
"learning_rate": 2.355978936303127e-07,
|
78340 |
+
"loss": 1.4452,
|
78341 |
+
"step": 22280
|
78342 |
+
},
|
78343 |
+
{
|
78344 |
+
"epoch": 0.9907073940687386,
|
78345 |
+
"grad_norm": 0.057079821825027466,
|
78346 |
+
"learning_rate": 2.3113202825425505e-07,
|
78347 |
+
"loss": 1.4395,
|
78348 |
+
"step": 22282
|
78349 |
+
},
|
78350 |
+
{
|
78351 |
+
"epoch": 0.990796318527411,
|
78352 |
+
"grad_norm": 0.05696665868163109,
|
78353 |
+
"learning_rate": 2.267088853235011e-07,
|
78354 |
+
"loss": 1.447,
|
78355 |
+
"step": 22284
|
78356 |
+
},
|
78357 |
+
{
|
78358 |
+
"epoch": 0.9908852429860833,
|
78359 |
+
"grad_norm": 0.056844066828489304,
|
78360 |
+
"learning_rate": 2.2232846521608175e-07,
|
78361 |
+
"loss": 1.4413,
|
78362 |
+
"step": 22286
|
78363 |
+
},
|
78364 |
+
{
|
78365 |
+
"epoch": 0.9909741674447556,
|
78366 |
+
"grad_norm": 0.05780512094497681,
|
78367 |
+
"learning_rate": 2.1799076830647525e-07,
|
78368 |
+
"loss": 1.4417,
|
78369 |
+
"step": 22288
|
78370 |
+
},
|
78371 |
+
{
|
78372 |
+
"epoch": 0.991063091903428,
|
78373 |
+
"grad_norm": 0.05735721439123154,
|
78374 |
+
"learning_rate": 2.1369579496549607e-07,
|
78375 |
+
"loss": 1.4478,
|
78376 |
+
"step": 22290
|
78377 |
+
},
|
78378 |
+
{
|
78379 |
+
"epoch": 0.9911520163621004,
|
78380 |
+
"grad_norm": 0.05718429014086723,
|
78381 |
+
"learning_rate": 2.0944354556023947e-07,
|
78382 |
+
"loss": 1.4469,
|
78383 |
+
"step": 22292
|
78384 |
+
},
|
78385 |
+
{
|
78386 |
+
"epoch": 0.9912409408207727,
|
78387 |
+
"grad_norm": 0.05797044187784195,
|
78388 |
+
"learning_rate": 2.0523402045430352e-07,
|
78389 |
+
"loss": 1.4446,
|
78390 |
+
"step": 22294
|
78391 |
+
},
|
78392 |
+
{
|
78393 |
+
"epoch": 0.9913298652794451,
|
78394 |
+
"grad_norm": 0.05726039782166481,
|
78395 |
+
"learning_rate": 2.0106722000745592e-07,
|
78396 |
+
"loss": 1.4418,
|
78397 |
+
"step": 22296
|
78398 |
+
},
|
78399 |
+
{
|
78400 |
+
"epoch": 0.9914187897381175,
|
78401 |
+
"grad_norm": 0.05672242492437363,
|
78402 |
+
"learning_rate": 1.969431445759673e-07,
|
78403 |
+
"loss": 1.4446,
|
78404 |
+
"step": 22298
|
78405 |
+
},
|
78406 |
+
{
|
78407 |
+
"epoch": 0.9915077141967898,
|
78408 |
+
"grad_norm": 0.057257115840911865,
|
78409 |
+
"learning_rate": 1.9286179451227792e-07,
|
78410 |
+
"loss": 1.448,
|
78411 |
+
"step": 22300
|
78412 |
+
},
|
78413 |
+
{
|
78414 |
+
"epoch": 0.9915966386554622,
|
78415 |
+
"grad_norm": 0.05845504254102707,
|
78416 |
+
"learning_rate": 1.8882317016538642e-07,
|
78417 |
+
"loss": 1.4445,
|
78418 |
+
"step": 22302
|
78419 |
+
},
|
78420 |
+
{
|
78421 |
+
"epoch": 0.9916855631141346,
|
78422 |
+
"grad_norm": 0.058910876512527466,
|
78423 |
+
"learning_rate": 1.8482727188046112e-07,
|
78424 |
+
"loss": 1.4505,
|
78425 |
+
"step": 22304
|
78426 |
+
},
|
78427 |
+
{
|
78428 |
+
"epoch": 0.991774487572807,
|
78429 |
+
"grad_norm": 0.05711057409644127,
|
78430 |
+
"learning_rate": 1.8087409999911763e-07,
|
78431 |
+
"loss": 1.4427,
|
78432 |
+
"step": 22306
|
78433 |
+
},
|
78434 |
+
{
|
78435 |
+
"epoch": 0.9918634120314792,
|
78436 |
+
"grad_norm": 0.05767577514052391,
|
78437 |
+
"learning_rate": 1.7696365485930787e-07,
|
78438 |
+
"loss": 1.4478,
|
78439 |
+
"step": 22308
|
78440 |
+
},
|
78441 |
+
{
|
78442 |
+
"epoch": 0.9919523364901516,
|
78443 |
+
"grad_norm": 0.057249389588832855,
|
78444 |
+
"learning_rate": 1.7309593679526447e-07,
|
78445 |
+
"loss": 1.4401,
|
78446 |
+
"step": 22310
|
78447 |
+
},
|
78448 |
+
{
|
78449 |
+
"epoch": 0.9920412609488239,
|
78450 |
+
"grad_norm": 0.05789843201637268,
|
78451 |
+
"learning_rate": 1.6927094613766736e-07,
|
78452 |
+
"loss": 1.4432,
|
78453 |
+
"step": 22312
|
78454 |
+
},
|
78455 |
+
{
|
78456 |
+
"epoch": 0.9921301854074963,
|
78457 |
+
"grad_norm": 0.05697624757885933,
|
78458 |
+
"learning_rate": 1.654886832134772e-07,
|
78459 |
+
"loss": 1.4426,
|
78460 |
+
"step": 22314
|
78461 |
+
},
|
78462 |
+
{
|
78463 |
+
"epoch": 0.9922191098661687,
|
78464 |
+
"grad_norm": 0.058805204927921295,
|
78465 |
+
"learning_rate": 1.6174914834599097e-07,
|
78466 |
+
"loss": 1.4463,
|
78467 |
+
"step": 22316
|
78468 |
+
},
|
78469 |
+
{
|
78470 |
+
"epoch": 0.992308034324841,
|
78471 |
+
"grad_norm": 0.05726942420005798,
|
78472 |
+
"learning_rate": 1.5805234185495287e-07,
|
78473 |
+
"loss": 1.4472,
|
78474 |
+
"step": 22318
|
78475 |
+
},
|
78476 |
+
{
|
78477 |
+
"epoch": 0.9923969587835134,
|
78478 |
+
"grad_norm": 0.05799218639731407,
|
78479 |
+
"learning_rate": 1.5439826405627688e-07,
|
78480 |
+
"loss": 1.4422,
|
78481 |
+
"step": 22320
|
78482 |
+
},
|
78483 |
+
{
|
78484 |
+
"epoch": 0.9924858832421858,
|
78485 |
+
"grad_norm": 0.05797122046351433,
|
78486 |
+
"learning_rate": 1.5078691526243526e-07,
|
78487 |
+
"loss": 1.4429,
|
78488 |
+
"step": 22322
|
78489 |
+
},
|
78490 |
+
{
|
78491 |
+
"epoch": 0.9925748077008582,
|
78492 |
+
"grad_norm": 0.057014547288417816,
|
78493 |
+
"learning_rate": 1.4721829578207002e-07,
|
78494 |
+
"loss": 1.4429,
|
78495 |
+
"step": 22324
|
78496 |
+
},
|
78497 |
+
{
|
78498 |
+
"epoch": 0.9926637321595305,
|
78499 |
+
"grad_norm": 0.05727580189704895,
|
78500 |
+
"learning_rate": 1.4369240592021493e-07,
|
78501 |
+
"loss": 1.4485,
|
78502 |
+
"step": 22326
|
78503 |
+
},
|
78504 |
+
{
|
78505 |
+
"epoch": 0.9927526566182029,
|
78506 |
+
"grad_norm": 0.05705159902572632,
|
78507 |
+
"learning_rate": 1.4020924597840656e-07,
|
78508 |
+
"loss": 1.4497,
|
78509 |
+
"step": 22328
|
78510 |
+
},
|
78511 |
+
{
|
78512 |
+
"epoch": 0.9928415810768751,
|
78513 |
+
"grad_norm": 0.05742578208446503,
|
78514 |
+
"learning_rate": 1.3676881625424021e-07,
|
78515 |
+
"loss": 1.4469,
|
78516 |
+
"step": 22330
|
78517 |
+
},
|
78518 |
+
{
|
78519 |
+
"epoch": 0.9929305055355475,
|
78520 |
+
"grad_norm": 0.058664653450250626,
|
78521 |
+
"learning_rate": 1.3337111704198047e-07,
|
78522 |
+
"loss": 1.4475,
|
78523 |
+
"step": 22332
|
78524 |
+
},
|
78525 |
+
{
|
78526 |
+
"epoch": 0.9930194299942199,
|
78527 |
+
"grad_norm": 0.05729004368185997,
|
78528 |
+
"learning_rate": 1.3001614863195066e-07,
|
78529 |
+
"loss": 1.4456,
|
78530 |
+
"step": 22334
|
78531 |
+
},
|
78532 |
+
{
|
78533 |
+
"epoch": 0.9931083544528923,
|
78534 |
+
"grad_norm": 0.05801570415496826,
|
78535 |
+
"learning_rate": 1.2670391131103242e-07,
|
78536 |
+
"loss": 1.4544,
|
78537 |
+
"step": 22336
|
78538 |
+
},
|
78539 |
+
{
|
78540 |
+
"epoch": 0.9931972789115646,
|
78541 |
+
"grad_norm": 0.05761876702308655,
|
78542 |
+
"learning_rate": 1.2343440536227714e-07,
|
78543 |
+
"loss": 1.4461,
|
78544 |
+
"step": 22338
|
78545 |
+
},
|
78546 |
+
{
|
78547 |
+
"epoch": 0.993286203370237,
|
78548 |
+
"grad_norm": 0.05685674026608467,
|
78549 |
+
"learning_rate": 1.2020763106529443e-07,
|
78550 |
+
"loss": 1.4403,
|
78551 |
+
"step": 22340
|
78552 |
+
},
|
78553 |
+
{
|
78554 |
+
"epoch": 0.9933751278289094,
|
78555 |
+
"grad_norm": 0.05733815208077431,
|
78556 |
+
"learning_rate": 1.170235886958082e-07,
|
78557 |
+
"loss": 1.4474,
|
78558 |
+
"step": 22342
|
78559 |
+
},
|
78560 |
+
{
|
78561 |
+
"epoch": 0.9934640522875817,
|
78562 |
+
"grad_norm": 0.057545486837625504,
|
78563 |
+
"learning_rate": 1.1388227852610066e-07,
|
78564 |
+
"loss": 1.445,
|
78565 |
+
"step": 22344
|
78566 |
+
},
|
78567 |
+
{
|
78568 |
+
"epoch": 0.9935529767462541,
|
78569 |
+
"grad_norm": 0.0574163943529129,
|
78570 |
+
"learning_rate": 1.1078370082467926e-07,
|
78571 |
+
"loss": 1.4422,
|
78572 |
+
"step": 22346
|
78573 |
+
},
|
78574 |
+
{
|
78575 |
+
"epoch": 0.9936419012049265,
|
78576 |
+
"grad_norm": 0.05813299119472504,
|
78577 |
+
"learning_rate": 1.0772785585633215e-07,
|
78578 |
+
"loss": 1.4507,
|
78579 |
+
"step": 22348
|
78580 |
+
},
|
78581 |
+
{
|
78582 |
+
"epoch": 0.9937308256635988,
|
78583 |
+
"grad_norm": 0.05734492093324661,
|
78584 |
+
"learning_rate": 1.0471474388240587e-07,
|
78585 |
+
"loss": 1.4422,
|
78586 |
+
"step": 22350
|
78587 |
+
},
|
78588 |
+
{
|
78589 |
+
"epoch": 0.9938197501222711,
|
78590 |
+
"grad_norm": 0.05803303048014641,
|
78591 |
+
"learning_rate": 1.0174436516047214e-07,
|
78592 |
+
"loss": 1.4482,
|
78593 |
+
"step": 22352
|
78594 |
+
},
|
78595 |
+
{
|
78596 |
+
"epoch": 0.9939086745809435,
|
78597 |
+
"grad_norm": 0.058048054575920105,
|
78598 |
+
"learning_rate": 9.881671994432795e-08,
|
78599 |
+
"loss": 1.4479,
|
78600 |
+
"step": 22354
|
78601 |
+
},
|
78602 |
+
{
|
78603 |
+
"epoch": 0.9939975990396158,
|
78604 |
+
"grad_norm": 0.057730190455913544,
|
78605 |
+
"learning_rate": 9.59318084843841e-08,
|
78606 |
+
"loss": 1.4538,
|
78607 |
+
"step": 22356
|
78608 |
+
},
|
78609 |
+
{
|
78610 |
+
"epoch": 0.9940865234982882,
|
78611 |
+
"grad_norm": 0.05775808170437813,
|
78612 |
+
"learning_rate": 9.308963102716561e-08,
|
78613 |
+
"loss": 1.442,
|
78614 |
+
"step": 22358
|
78615 |
+
},
|
78616 |
+
{
|
78617 |
+
"epoch": 0.9941754479569606,
|
78618 |
+
"grad_norm": 0.05732406675815582,
|
78619 |
+
"learning_rate": 9.029018781570031e-08,
|
78620 |
+
"loss": 1.447,
|
78621 |
+
"step": 22360
|
78622 |
+
},
|
78623 |
+
{
|
78624 |
+
"epoch": 0.9942643724156329,
|
78625 |
+
"grad_norm": 0.05907835438847542,
|
78626 |
+
"learning_rate": 8.753347908924126e-08,
|
78627 |
+
"loss": 1.4477,
|
78628 |
+
"step": 22362
|
78629 |
+
},
|
78630 |
+
{
|
78631 |
+
"epoch": 0.9943532968743053,
|
78632 |
+
"grad_norm": 0.05664451792836189,
|
78633 |
+
"learning_rate": 8.481950508343328e-08,
|
78634 |
+
"loss": 1.4409,
|
78635 |
+
"step": 22364
|
78636 |
+
},
|
78637 |
+
{
|
78638 |
+
"epoch": 0.9944422213329777,
|
78639 |
+
"grad_norm": 0.05807731673121452,
|
78640 |
+
"learning_rate": 8.214826603031301e-08,
|
78641 |
+
"loss": 1.4443,
|
78642 |
+
"step": 22366
|
78643 |
+
},
|
78644 |
+
{
|
78645 |
+
"epoch": 0.99453114579165,
|
78646 |
+
"grad_norm": 0.0581711083650589,
|
78647 |
+
"learning_rate": 7.951976215825329e-08,
|
78648 |
+
"loss": 1.4511,
|
78649 |
+
"step": 22368
|
78650 |
+
},
|
78651 |
+
{
|
78652 |
+
"epoch": 0.9946200702503224,
|
78653 |
+
"grad_norm": 0.057496510446071625,
|
78654 |
+
"learning_rate": 7.693399369190779e-08,
|
78655 |
+
"loss": 1.4406,
|
78656 |
+
"step": 22370
|
78657 |
+
},
|
78658 |
+
{
|
78659 |
+
"epoch": 0.9947089947089947,
|
78660 |
+
"grad_norm": 0.05667787045240402,
|
78661 |
+
"learning_rate": 7.43909608523774e-08,
|
78662 |
+
"loss": 1.4445,
|
78663 |
+
"step": 22372
|
78664 |
+
},
|
78665 |
+
{
|
78666 |
+
"epoch": 0.994797919167667,
|
78667 |
+
"grad_norm": 0.05935773253440857,
|
78668 |
+
"learning_rate": 7.189066385693276e-08,
|
78669 |
+
"loss": 1.4474,
|
78670 |
+
"step": 22374
|
78671 |
+
},
|
78672 |
+
{
|
78673 |
+
"epoch": 0.9948868436263394,
|
78674 |
+
"grad_norm": 0.05779668688774109,
|
78675 |
+
"learning_rate": 6.943310291945836e-08,
|
78676 |
+
"loss": 1.4459,
|
78677 |
+
"step": 22376
|
78678 |
+
},
|
78679 |
+
{
|
78680 |
+
"epoch": 0.9949757680850118,
|
78681 |
+
"grad_norm": 0.056512489914894104,
|
78682 |
+
"learning_rate": 6.701827824989737e-08,
|
78683 |
+
"loss": 1.4443,
|
78684 |
+
"step": 22378
|
78685 |
+
},
|
78686 |
+
{
|
78687 |
+
"epoch": 0.9950646925436841,
|
78688 |
+
"grad_norm": 0.058668848127126694,
|
78689 |
+
"learning_rate": 6.464619005480676e-08,
|
78690 |
+
"loss": 1.4499,
|
78691 |
+
"step": 22380
|
78692 |
+
},
|
78693 |
+
{
|
78694 |
+
"epoch": 0.9951536170023565,
|
78695 |
+
"grad_norm": 0.05988642945885658,
|
78696 |
+
"learning_rate": 6.231683853685776e-08,
|
78697 |
+
"loss": 1.4459,
|
78698 |
+
"step": 22382
|
78699 |
+
},
|
78700 |
+
{
|
78701 |
+
"epoch": 0.9952425414610289,
|
78702 |
+
"grad_norm": 0.057425398379564285,
|
78703 |
+
"learning_rate": 6.003022389522439e-08,
|
78704 |
+
"loss": 1.4456,
|
78705 |
+
"step": 22384
|
78706 |
+
},
|
78707 |
+
{
|
78708 |
+
"epoch": 0.9953314659197012,
|
78709 |
+
"grad_norm": 0.05751774087548256,
|
78710 |
+
"learning_rate": 5.778634632536139e-08,
|
78711 |
+
"loss": 1.4431,
|
78712 |
+
"step": 22386
|
78713 |
+
},
|
78714 |
+
{
|
78715 |
+
"epoch": 0.9954203903783736,
|
78716 |
+
"grad_norm": 0.05865611135959625,
|
78717 |
+
"learning_rate": 5.558520601917083e-08,
|
78718 |
+
"loss": 1.4444,
|
78719 |
+
"step": 22388
|
78720 |
+
},
|
78721 |
+
{
|
78722 |
+
"epoch": 0.995509314837046,
|
78723 |
+
"grad_norm": 0.05733845382928848,
|
78724 |
+
"learning_rate": 5.342680316466897e-08,
|
78725 |
+
"loss": 1.4468,
|
78726 |
+
"step": 22390
|
78727 |
+
},
|
78728 |
+
{
|
78729 |
+
"epoch": 0.9955982392957183,
|
78730 |
+
"grad_norm": 0.059069231152534485,
|
78731 |
+
"learning_rate": 5.131113794643039e-08,
|
78732 |
+
"loss": 1.4488,
|
78733 |
+
"step": 22392
|
78734 |
+
},
|
78735 |
+
{
|
78736 |
+
"epoch": 0.9956871637543906,
|
78737 |
+
"grad_norm": 0.05756256729364395,
|
78738 |
+
"learning_rate": 4.923821054536592e-08,
|
78739 |
+
"loss": 1.4397,
|
78740 |
+
"step": 22394
|
78741 |
+
},
|
78742 |
+
{
|
78743 |
+
"epoch": 0.995776088213063,
|
78744 |
+
"grad_norm": 0.057630039751529694,
|
78745 |
+
"learning_rate": 4.7208021138611666e-08,
|
78746 |
+
"loss": 1.4437,
|
78747 |
+
"step": 22396
|
78748 |
+
},
|
78749 |
+
{
|
78750 |
+
"epoch": 0.9958650126717353,
|
78751 |
+
"grad_norm": 0.057779572904109955,
|
78752 |
+
"learning_rate": 4.5220569899750984e-08,
|
78753 |
+
"loss": 1.4464,
|
78754 |
+
"step": 22398
|
78755 |
+
},
|
78756 |
+
{
|
78757 |
+
"epoch": 0.9959539371304077,
|
78758 |
+
"grad_norm": 0.05961634963750839,
|
78759 |
+
"learning_rate": 4.327585699859249e-08,
|
78760 |
+
"loss": 1.4437,
|
78761 |
+
"step": 22400
|
78762 |
+
},
|
78763 |
+
{
|
78764 |
+
"epoch": 0.9960428615890801,
|
78765 |
+
"grad_norm": 0.05674600601196289,
|
78766 |
+
"learning_rate": 4.137388260155861e-08,
|
78767 |
+
"loss": 1.4464,
|
78768 |
+
"step": 22402
|
78769 |
+
},
|
78770 |
+
{
|
78771 |
+
"epoch": 0.9961317860477524,
|
78772 |
+
"grad_norm": 0.056886181235313416,
|
78773 |
+
"learning_rate": 3.951464687107498e-08,
|
78774 |
+
"loss": 1.445,
|
78775 |
+
"step": 22404
|
78776 |
+
},
|
78777 |
+
{
|
78778 |
+
"epoch": 0.9962207105064248,
|
78779 |
+
"grad_norm": 0.0578632578253746,
|
78780 |
+
"learning_rate": 3.769814996612553e-08,
|
78781 |
+
"loss": 1.4484,
|
78782 |
+
"step": 22406
|
78783 |
+
},
|
78784 |
+
{
|
78785 |
+
"epoch": 0.9963096349650972,
|
78786 |
+
"grad_norm": 0.057221461087465286,
|
78787 |
+
"learning_rate": 3.592439204197495e-08,
|
78788 |
+
"loss": 1.445,
|
78789 |
+
"step": 22408
|
78790 |
+
},
|
78791 |
+
{
|
78792 |
+
"epoch": 0.9963985594237695,
|
78793 |
+
"grad_norm": 0.058185044676065445,
|
78794 |
+
"learning_rate": 3.4193373250335224e-08,
|
78795 |
+
"loss": 1.4412,
|
78796 |
+
"step": 22410
|
78797 |
+
},
|
78798 |
+
{
|
78799 |
+
"epoch": 0.9964874838824419,
|
78800 |
+
"grad_norm": 0.05843387171626091,
|
78801 |
+
"learning_rate": 3.250509373908805e-08,
|
78802 |
+
"loss": 1.4423,
|
78803 |
+
"step": 22412
|
78804 |
+
},
|
78805 |
+
{
|
78806 |
+
"epoch": 0.9965764083411143,
|
78807 |
+
"grad_norm": 0.05737845599651337,
|
78808 |
+
"learning_rate": 3.085955365261794e-08,
|
78809 |
+
"loss": 1.4465,
|
78810 |
+
"step": 22414
|
78811 |
+
},
|
78812 |
+
{
|
78813 |
+
"epoch": 0.9966653327997865,
|
78814 |
+
"grad_norm": 0.05840914696455002,
|
78815 |
+
"learning_rate": 2.925675313159015e-08,
|
78816 |
+
"loss": 1.4438,
|
78817 |
+
"step": 22416
|
78818 |
+
},
|
78819 |
+
{
|
78820 |
+
"epoch": 0.9967542572584589,
|
78821 |
+
"grad_norm": 0.05688779801130295,
|
78822 |
+
"learning_rate": 2.769669231295069e-08,
|
78823 |
+
"loss": 1.4438,
|
78824 |
+
"step": 22418
|
78825 |
+
},
|
78826 |
+
{
|
78827 |
+
"epoch": 0.9968431817171313,
|
78828 |
+
"grad_norm": 0.05774452164769173,
|
78829 |
+
"learning_rate": 2.617937133009285e-08,
|
78830 |
+
"loss": 1.4466,
|
78831 |
+
"step": 22420
|
78832 |
+
},
|
78833 |
+
{
|
78834 |
+
"epoch": 0.9969321061758036,
|
78835 |
+
"grad_norm": 0.05797778069972992,
|
78836 |
+
"learning_rate": 2.4704790312746194e-08,
|
78837 |
+
"loss": 1.4443,
|
78838 |
+
"step": 22422
|
78839 |
+
},
|
78840 |
+
{
|
78841 |
+
"epoch": 0.997021030634476,
|
78842 |
+
"grad_norm": 0.057229943573474884,
|
78843 |
+
"learning_rate": 2.327294938697655e-08,
|
78844 |
+
"loss": 1.4496,
|
78845 |
+
"step": 22424
|
78846 |
+
},
|
78847 |
+
{
|
78848 |
+
"epoch": 0.9971099550931484,
|
78849 |
+
"grad_norm": 0.05771101638674736,
|
78850 |
+
"learning_rate": 2.188384867513049e-08,
|
78851 |
+
"loss": 1.4412,
|
78852 |
+
"step": 22426
|
78853 |
+
},
|
78854 |
+
{
|
78855 |
+
"epoch": 0.9971988795518207,
|
78856 |
+
"grad_norm": 0.057632721960544586,
|
78857 |
+
"learning_rate": 2.053748829600188e-08,
|
78858 |
+
"loss": 1.4515,
|
78859 |
+
"step": 22428
|
78860 |
+
},
|
78861 |
+
{
|
78862 |
+
"epoch": 0.9972878040104931,
|
78863 |
+
"grad_norm": 0.05861422419548035,
|
78864 |
+
"learning_rate": 1.9233868364665342e-08,
|
78865 |
+
"loss": 1.4463,
|
78866 |
+
"step": 22430
|
78867 |
+
},
|
78868 |
+
{
|
78869 |
+
"epoch": 0.9973767284691655,
|
78870 |
+
"grad_norm": 0.05753085017204285,
|
78871 |
+
"learning_rate": 1.797298899258726e-08,
|
78872 |
+
"loss": 1.4453,
|
78873 |
+
"step": 22432
|
78874 |
+
},
|
78875 |
+
{
|
78876 |
+
"epoch": 0.9974656529278378,
|
78877 |
+
"grad_norm": 0.057691846042871475,
|
78878 |
+
"learning_rate": 1.6754850287459267e-08,
|
78879 |
+
"loss": 1.4435,
|
78880 |
+
"step": 22434
|
78881 |
+
},
|
78882 |
+
{
|
78883 |
+
"epoch": 0.9975545773865102,
|
78884 |
+
"grad_norm": 0.05841954052448273,
|
78885 |
+
"learning_rate": 1.5579452353531308e-08,
|
78886 |
+
"loss": 1.4526,
|
78887 |
+
"step": 22436
|
78888 |
+
},
|
78889 |
+
{
|
78890 |
+
"epoch": 0.9976435018451825,
|
78891 |
+
"grad_norm": 0.05724509432911873,
|
78892 |
+
"learning_rate": 1.4446795291223059e-08,
|
78893 |
+
"loss": 1.4428,
|
78894 |
+
"step": 22438
|
78895 |
+
},
|
78896 |
+
{
|
78897 |
+
"epoch": 0.9977324263038548,
|
78898 |
+
"grad_norm": 0.05789647623896599,
|
78899 |
+
"learning_rate": 1.3356879197401473e-08,
|
78900 |
+
"loss": 1.445,
|
78901 |
+
"step": 22440
|
78902 |
+
},
|
78903 |
+
{
|
78904 |
+
"epoch": 0.9978213507625272,
|
78905 |
+
"grad_norm": 0.058276426047086716,
|
78906 |
+
"learning_rate": 1.2309704165158753e-08,
|
78907 |
+
"loss": 1.4454,
|
78908 |
+
"step": 22442
|
78909 |
+
},
|
78910 |
+
{
|
78911 |
+
"epoch": 0.9979102752211996,
|
78912 |
+
"grad_norm": 0.057980820536613464,
|
78913 |
+
"learning_rate": 1.1305270284089897e-08,
|
78914 |
+
"loss": 1.4448,
|
78915 |
+
"step": 22444
|
78916 |
+
},
|
78917 |
+
{
|
78918 |
+
"epoch": 0.9979991996798719,
|
78919 |
+
"grad_norm": 0.05673612281680107,
|
78920 |
+
"learning_rate": 1.0343577639959634e-08,
|
78921 |
+
"loss": 1.446,
|
78922 |
+
"step": 22446
|
78923 |
+
},
|
78924 |
+
{
|
78925 |
+
"epoch": 0.9980881241385443,
|
78926 |
+
"grad_norm": 0.05783254653215408,
|
78927 |
+
"learning_rate": 9.424626315091e-09,
|
78928 |
+
"loss": 1.4437,
|
78929 |
+
"step": 22448
|
78930 |
+
},
|
78931 |
+
{
|
78932 |
+
"epoch": 0.9981770485972167,
|
78933 |
+
"grad_norm": 0.058315496891736984,
|
78934 |
+
"learning_rate": 8.548416388032277e-09,
|
78935 |
+
"loss": 1.4446,
|
78936 |
+
"step": 22450
|
78937 |
+
},
|
78938 |
+
{
|
78939 |
+
"epoch": 0.998265973055889,
|
78940 |
+
"grad_norm": 0.05717792361974716,
|
78941 |
+
"learning_rate": 7.714947933556982e-09,
|
78942 |
+
"loss": 1.4445,
|
78943 |
+
"step": 22452
|
78944 |
+
},
|
78945 |
+
{
|
78946 |
+
"epoch": 0.9983548975145614,
|
78947 |
+
"grad_norm": 0.05846971645951271,
|
78948 |
+
"learning_rate": 6.924221023052457e-09,
|
78949 |
+
"loss": 1.4477,
|
78950 |
+
"step": 22454
|
78951 |
+
},
|
78952 |
+
{
|
78953 |
+
"epoch": 0.9984438219732338,
|
78954 |
+
"grad_norm": 0.05766315013170242,
|
78955 |
+
"learning_rate": 6.176235724075774e-09,
|
78956 |
+
"loss": 1.4465,
|
78957 |
+
"step": 22456
|
78958 |
+
},
|
78959 |
+
{
|
78960 |
+
"epoch": 0.9985327464319061,
|
78961 |
+
"grad_norm": 0.058049630373716354,
|
78962 |
+
"learning_rate": 5.470992100520267e-09,
|
78963 |
+
"loss": 1.4475,
|
78964 |
+
"step": 22458
|
78965 |
+
},
|
78966 |
+
{
|
78967 |
+
"epoch": 0.9986216708905784,
|
78968 |
+
"grad_norm": 0.057937148958444595,
|
78969 |
+
"learning_rate": 4.808490212726557e-09,
|
78970 |
+
"loss": 1.4404,
|
78971 |
+
"step": 22460
|
78972 |
+
},
|
78973 |
+
{
|
78974 |
+
"epoch": 0.9987105953492508,
|
78975 |
+
"grad_norm": 0.05762643739581108,
|
78976 |
+
"learning_rate": 4.18873011731602e-09,
|
78977 |
+
"loss": 1.446,
|
78978 |
+
"step": 22462
|
78979 |
+
},
|
78980 |
+
{
|
78981 |
+
"epoch": 0.9987995198079231,
|
78982 |
+
"grad_norm": 0.05884932354092598,
|
78983 |
+
"learning_rate": 3.611711867246292e-09,
|
78984 |
+
"loss": 1.4505,
|
78985 |
+
"step": 22464
|
78986 |
+
},
|
78987 |
+
{
|
78988 |
+
"epoch": 0.9988884442665955,
|
78989 |
+
"grad_norm": 0.05755854770541191,
|
78990 |
+
"learning_rate": 3.0774355119222996e-09,
|
78991 |
+
"loss": 1.4517,
|
78992 |
+
"step": 22466
|
78993 |
+
},
|
78994 |
+
{
|
78995 |
+
"epoch": 0.9989773687252679,
|
78996 |
+
"grad_norm": 0.05807171016931534,
|
78997 |
+
"learning_rate": 2.585901096863186e-09,
|
78998 |
+
"loss": 1.4417,
|
78999 |
+
"step": 22468
|
79000 |
+
},
|
79001 |
+
{
|
79002 |
+
"epoch": 0.9990662931839402,
|
79003 |
+
"grad_norm": 0.057942621409893036,
|
79004 |
+
"learning_rate": 2.1371086642574255e-09,
|
79005 |
+
"loss": 1.4481,
|
79006 |
+
"step": 22470
|
79007 |
+
},
|
79008 |
+
{
|
79009 |
+
"epoch": 0.9991552176426126,
|
79010 |
+
"grad_norm": 0.05796192213892937,
|
79011 |
+
"learning_rate": 1.7310582523522023e-09,
|
79012 |
+
"loss": 1.4428,
|
79013 |
+
"step": 22472
|
79014 |
+
},
|
79015 |
+
{
|
79016 |
+
"epoch": 0.999244142101285,
|
79017 |
+
"grad_norm": 0.05767205357551575,
|
79018 |
+
"learning_rate": 1.3677498958974966e-09,
|
79019 |
+
"loss": 1.448,
|
79020 |
+
"step": 22474
|
79021 |
+
},
|
79022 |
+
{
|
79023 |
+
"epoch": 0.9993330665599574,
|
79024 |
+
"grad_norm": 0.05703622102737427,
|
79025 |
+
"learning_rate": 1.047183625924042e-09,
|
79026 |
+
"loss": 1.4464,
|
79027 |
+
"step": 22476
|
79028 |
+
},
|
79029 |
+
{
|
79030 |
+
"epoch": 0.9994219910186297,
|
79031 |
+
"grad_norm": 0.05870012938976288,
|
79032 |
+
"learning_rate": 7.693594699098582e-10,
|
79033 |
+
"loss": 1.4521,
|
79034 |
+
"step": 22478
|
79035 |
+
},
|
79036 |
+
{
|
79037 |
+
"epoch": 0.9995109154773021,
|
79038 |
+
"grad_norm": 0.057245586067438126,
|
79039 |
+
"learning_rate": 5.342774515026961e-10,
|
79040 |
+
"loss": 1.4448,
|
79041 |
+
"step": 22480
|
79042 |
+
},
|
79043 |
+
{
|
79044 |
+
"epoch": 0.9995998399359743,
|
79045 |
+
"grad_norm": 0.05662744492292404,
|
79046 |
+
"learning_rate": 3.419375909086142e-10,
|
79047 |
+
"loss": 1.4334,
|
79048 |
+
"step": 22482
|
79049 |
+
},
|
79050 |
+
{
|
79051 |
+
"epoch": 0.9996887643946467,
|
79052 |
+
"grad_norm": 0.05766800418496132,
|
79053 |
+
"learning_rate": 1.923399044478913e-10,
|
79054 |
+
"loss": 1.4482,
|
79055 |
+
"step": 22484
|
79056 |
+
},
|
79057 |
+
{
|
79058 |
+
"epoch": 0.9997776888533191,
|
79059 |
+
"grad_norm": 0.058209143579006195,
|
79060 |
+
"learning_rate": 8.548440505462551e-11,
|
79061 |
+
"loss": 1.4435,
|
79062 |
+
"step": 22486
|
79063 |
+
},
|
79064 |
+
{
|
79065 |
+
"epoch": 0.9998666133119914,
|
79066 |
+
"grad_norm": 0.05729079991579056,
|
79067 |
+
"learning_rate": 2.1371101721623376e-11,
|
79068 |
+
"loss": 1.4459,
|
79069 |
+
"step": 22488
|
79070 |
+
},
|
79071 |
+
{
|
79072 |
+
"epoch": 0.9999555377706638,
|
79073 |
+
"grad_norm": 0.057013168931007385,
|
79074 |
+
"learning_rate": 0.0,
|
79075 |
+
"loss": 1.4473,
|
79076 |
+
"step": 22490
|
79077 |
}
|
79078 |
],
|
79079 |
"logging_steps": 2,
|
|
|
79088 |
"should_evaluate": false,
|
79089 |
"should_log": false,
|
79090 |
"should_save": true,
|
79091 |
+
"should_training_stop": true
|
79092 |
},
|
79093 |
"attributes": {}
|
79094 |
}
|
79095 |
},
|
79096 |
+
"total_flos": 4.812454399322358e+19,
|
79097 |
"train_batch_size": 768,
|
79098 |
"trial_name": null,
|
79099 |
"trial_params": null
|