satvik-dixit
commited on
Uploaded checkpoint-27500
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1761 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 119975656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ad86e91f2924a189dc19b796deef58a4c1f44b9596040dfbed596e3a58a58a4
|
3 |
size 119975656
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 240145026
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6dcec2fc7bddf9ccd947d61cfbb7ec0d09e233cfdb81b4d8f00e3043b60ec27
|
3 |
size 240145026
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:971f8c6b700d32d9d1711207ade77f4dca9cda1be000e561bca9b74000ac50f5
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bae572518ab53ddc674f52a5ef01613875bea64a8d9c53d4b7d4a9aedc712f19
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.2961933612823486,
|
3 |
"best_model_checkpoint": "runs/deepseek_lora_20240422-141601/checkpoint-25000",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 2500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17587,6 +17587,1764 @@
|
|
17587 |
"eval_samples_per_second": 8.174,
|
17588 |
"eval_steps_per_second": 8.174,
|
17589 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17590 |
}
|
17591 |
],
|
17592 |
"logging_steps": 10,
|
@@ -17594,7 +19352,7 @@
|
|
17594 |
"num_input_tokens_seen": 0,
|
17595 |
"num_train_epochs": 1,
|
17596 |
"save_steps": 2500,
|
17597 |
-
"total_flos": 4.
|
17598 |
"train_batch_size": 1,
|
17599 |
"trial_name": null,
|
17600 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.2961933612823486,
|
3 |
"best_model_checkpoint": "runs/deepseek_lora_20240422-141601/checkpoint-25000",
|
4 |
+
"epoch": 0.6875,
|
5 |
"eval_steps": 2500,
|
6 |
+
"global_step": 27500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17587 |
"eval_samples_per_second": 8.174,
|
17588 |
"eval_steps_per_second": 8.174,
|
17589 |
"step": 25000
|
17590 |
+
},
|
17591 |
+
{
|
17592 |
+
"epoch": 0.63,
|
17593 |
+
"grad_norm": 7.878478527069092,
|
17594 |
+
"learning_rate": 3.3830508474576273e-06,
|
17595 |
+
"loss": 1.5645,
|
17596 |
+
"step": 25010
|
17597 |
+
},
|
17598 |
+
{
|
17599 |
+
"epoch": 0.63,
|
17600 |
+
"grad_norm": 4.464993000030518,
|
17601 |
+
"learning_rate": 3.3762711864406783e-06,
|
17602 |
+
"loss": 1.2522,
|
17603 |
+
"step": 25020
|
17604 |
+
},
|
17605 |
+
{
|
17606 |
+
"epoch": 0.63,
|
17607 |
+
"grad_norm": 1.824704885482788,
|
17608 |
+
"learning_rate": 3.3694915254237292e-06,
|
17609 |
+
"loss": 1.1667,
|
17610 |
+
"step": 25030
|
17611 |
+
},
|
17612 |
+
{
|
17613 |
+
"epoch": 0.63,
|
17614 |
+
"grad_norm": 4.979220390319824,
|
17615 |
+
"learning_rate": 3.3627118644067802e-06,
|
17616 |
+
"loss": 1.4592,
|
17617 |
+
"step": 25040
|
17618 |
+
},
|
17619 |
+
{
|
17620 |
+
"epoch": 0.63,
|
17621 |
+
"grad_norm": 7.964636325836182,
|
17622 |
+
"learning_rate": 3.3559322033898308e-06,
|
17623 |
+
"loss": 1.3505,
|
17624 |
+
"step": 25050
|
17625 |
+
},
|
17626 |
+
{
|
17627 |
+
"epoch": 0.63,
|
17628 |
+
"grad_norm": 3.848740816116333,
|
17629 |
+
"learning_rate": 3.3491525423728817e-06,
|
17630 |
+
"loss": 1.3424,
|
17631 |
+
"step": 25060
|
17632 |
+
},
|
17633 |
+
{
|
17634 |
+
"epoch": 0.63,
|
17635 |
+
"grad_norm": 5.446021556854248,
|
17636 |
+
"learning_rate": 3.3423728813559327e-06,
|
17637 |
+
"loss": 1.3514,
|
17638 |
+
"step": 25070
|
17639 |
+
},
|
17640 |
+
{
|
17641 |
+
"epoch": 0.63,
|
17642 |
+
"grad_norm": 4.195797920227051,
|
17643 |
+
"learning_rate": 3.3355932203389833e-06,
|
17644 |
+
"loss": 1.4442,
|
17645 |
+
"step": 25080
|
17646 |
+
},
|
17647 |
+
{
|
17648 |
+
"epoch": 0.63,
|
17649 |
+
"grad_norm": 5.259161949157715,
|
17650 |
+
"learning_rate": 3.3288135593220343e-06,
|
17651 |
+
"loss": 1.4244,
|
17652 |
+
"step": 25090
|
17653 |
+
},
|
17654 |
+
{
|
17655 |
+
"epoch": 0.63,
|
17656 |
+
"grad_norm": 4.59972620010376,
|
17657 |
+
"learning_rate": 3.322033898305085e-06,
|
17658 |
+
"loss": 1.3104,
|
17659 |
+
"step": 25100
|
17660 |
+
},
|
17661 |
+
{
|
17662 |
+
"epoch": 0.63,
|
17663 |
+
"grad_norm": 4.761903762817383,
|
17664 |
+
"learning_rate": 3.3152542372881358e-06,
|
17665 |
+
"loss": 1.3031,
|
17666 |
+
"step": 25110
|
17667 |
+
},
|
17668 |
+
{
|
17669 |
+
"epoch": 0.63,
|
17670 |
+
"grad_norm": 10.678994178771973,
|
17671 |
+
"learning_rate": 3.3084745762711868e-06,
|
17672 |
+
"loss": 1.372,
|
17673 |
+
"step": 25120
|
17674 |
+
},
|
17675 |
+
{
|
17676 |
+
"epoch": 0.63,
|
17677 |
+
"grad_norm": 5.033021926879883,
|
17678 |
+
"learning_rate": 3.3016949152542377e-06,
|
17679 |
+
"loss": 1.3018,
|
17680 |
+
"step": 25130
|
17681 |
+
},
|
17682 |
+
{
|
17683 |
+
"epoch": 0.63,
|
17684 |
+
"grad_norm": 14.870203971862793,
|
17685 |
+
"learning_rate": 3.2949152542372887e-06,
|
17686 |
+
"loss": 1.3284,
|
17687 |
+
"step": 25140
|
17688 |
+
},
|
17689 |
+
{
|
17690 |
+
"epoch": 0.63,
|
17691 |
+
"grad_norm": 1.7636396884918213,
|
17692 |
+
"learning_rate": 3.288135593220339e-06,
|
17693 |
+
"loss": 1.3044,
|
17694 |
+
"step": 25150
|
17695 |
+
},
|
17696 |
+
{
|
17697 |
+
"epoch": 0.63,
|
17698 |
+
"grad_norm": 2.6163928508758545,
|
17699 |
+
"learning_rate": 3.28135593220339e-06,
|
17700 |
+
"loss": 1.1875,
|
17701 |
+
"step": 25160
|
17702 |
+
},
|
17703 |
+
{
|
17704 |
+
"epoch": 0.63,
|
17705 |
+
"grad_norm": 4.163614273071289,
|
17706 |
+
"learning_rate": 3.274576271186441e-06,
|
17707 |
+
"loss": 1.3458,
|
17708 |
+
"step": 25170
|
17709 |
+
},
|
17710 |
+
{
|
17711 |
+
"epoch": 0.63,
|
17712 |
+
"grad_norm": 9.529121398925781,
|
17713 |
+
"learning_rate": 3.2677966101694918e-06,
|
17714 |
+
"loss": 1.2659,
|
17715 |
+
"step": 25180
|
17716 |
+
},
|
17717 |
+
{
|
17718 |
+
"epoch": 0.63,
|
17719 |
+
"grad_norm": 10.017566680908203,
|
17720 |
+
"learning_rate": 3.2610169491525428e-06,
|
17721 |
+
"loss": 1.4239,
|
17722 |
+
"step": 25190
|
17723 |
+
},
|
17724 |
+
{
|
17725 |
+
"epoch": 0.63,
|
17726 |
+
"grad_norm": 4.03933048248291,
|
17727 |
+
"learning_rate": 3.2542372881355933e-06,
|
17728 |
+
"loss": 1.252,
|
17729 |
+
"step": 25200
|
17730 |
+
},
|
17731 |
+
{
|
17732 |
+
"epoch": 0.63,
|
17733 |
+
"grad_norm": 6.725574970245361,
|
17734 |
+
"learning_rate": 3.2474576271186443e-06,
|
17735 |
+
"loss": 1.1699,
|
17736 |
+
"step": 25210
|
17737 |
+
},
|
17738 |
+
{
|
17739 |
+
"epoch": 0.63,
|
17740 |
+
"grad_norm": 8.446992874145508,
|
17741 |
+
"learning_rate": 3.2406779661016953e-06,
|
17742 |
+
"loss": 1.2996,
|
17743 |
+
"step": 25220
|
17744 |
+
},
|
17745 |
+
{
|
17746 |
+
"epoch": 0.63,
|
17747 |
+
"grad_norm": 15.130813598632812,
|
17748 |
+
"learning_rate": 3.2338983050847462e-06,
|
17749 |
+
"loss": 1.1601,
|
17750 |
+
"step": 25230
|
17751 |
+
},
|
17752 |
+
{
|
17753 |
+
"epoch": 0.63,
|
17754 |
+
"grad_norm": 8.915867805480957,
|
17755 |
+
"learning_rate": 3.2271186440677972e-06,
|
17756 |
+
"loss": 1.4512,
|
17757 |
+
"step": 25240
|
17758 |
+
},
|
17759 |
+
{
|
17760 |
+
"epoch": 0.63,
|
17761 |
+
"grad_norm": 3.813676595687866,
|
17762 |
+
"learning_rate": 3.2203389830508473e-06,
|
17763 |
+
"loss": 1.4311,
|
17764 |
+
"step": 25250
|
17765 |
+
},
|
17766 |
+
{
|
17767 |
+
"epoch": 0.63,
|
17768 |
+
"grad_norm": 8.306417465209961,
|
17769 |
+
"learning_rate": 3.2135593220338983e-06,
|
17770 |
+
"loss": 1.2554,
|
17771 |
+
"step": 25260
|
17772 |
+
},
|
17773 |
+
{
|
17774 |
+
"epoch": 0.63,
|
17775 |
+
"grad_norm": 4.557586193084717,
|
17776 |
+
"learning_rate": 3.2067796610169493e-06,
|
17777 |
+
"loss": 1.4084,
|
17778 |
+
"step": 25270
|
17779 |
+
},
|
17780 |
+
{
|
17781 |
+
"epoch": 0.63,
|
17782 |
+
"grad_norm": 3.206493616104126,
|
17783 |
+
"learning_rate": 3.2000000000000003e-06,
|
17784 |
+
"loss": 1.3265,
|
17785 |
+
"step": 25280
|
17786 |
+
},
|
17787 |
+
{
|
17788 |
+
"epoch": 0.63,
|
17789 |
+
"grad_norm": 4.9644622802734375,
|
17790 |
+
"learning_rate": 3.1932203389830513e-06,
|
17791 |
+
"loss": 1.3932,
|
17792 |
+
"step": 25290
|
17793 |
+
},
|
17794 |
+
{
|
17795 |
+
"epoch": 0.63,
|
17796 |
+
"grad_norm": 3.4366214275360107,
|
17797 |
+
"learning_rate": 3.186440677966102e-06,
|
17798 |
+
"loss": 1.4233,
|
17799 |
+
"step": 25300
|
17800 |
+
},
|
17801 |
+
{
|
17802 |
+
"epoch": 0.63,
|
17803 |
+
"grad_norm": 6.7854461669921875,
|
17804 |
+
"learning_rate": 3.1796610169491528e-06,
|
17805 |
+
"loss": 1.2838,
|
17806 |
+
"step": 25310
|
17807 |
+
},
|
17808 |
+
{
|
17809 |
+
"epoch": 0.63,
|
17810 |
+
"grad_norm": 6.945521831512451,
|
17811 |
+
"learning_rate": 3.1728813559322038e-06,
|
17812 |
+
"loss": 1.2936,
|
17813 |
+
"step": 25320
|
17814 |
+
},
|
17815 |
+
{
|
17816 |
+
"epoch": 0.63,
|
17817 |
+
"grad_norm": 4.550187587738037,
|
17818 |
+
"learning_rate": 3.1661016949152547e-06,
|
17819 |
+
"loss": 1.3477,
|
17820 |
+
"step": 25330
|
17821 |
+
},
|
17822 |
+
{
|
17823 |
+
"epoch": 0.63,
|
17824 |
+
"grad_norm": 5.256103515625,
|
17825 |
+
"learning_rate": 3.1593220338983053e-06,
|
17826 |
+
"loss": 1.4062,
|
17827 |
+
"step": 25340
|
17828 |
+
},
|
17829 |
+
{
|
17830 |
+
"epoch": 0.63,
|
17831 |
+
"grad_norm": 11.691208839416504,
|
17832 |
+
"learning_rate": 3.1525423728813563e-06,
|
17833 |
+
"loss": 1.1473,
|
17834 |
+
"step": 25350
|
17835 |
+
},
|
17836 |
+
{
|
17837 |
+
"epoch": 0.63,
|
17838 |
+
"grad_norm": 5.051747798919678,
|
17839 |
+
"learning_rate": 3.145762711864407e-06,
|
17840 |
+
"loss": 1.4299,
|
17841 |
+
"step": 25360
|
17842 |
+
},
|
17843 |
+
{
|
17844 |
+
"epoch": 0.63,
|
17845 |
+
"grad_norm": 2.562920093536377,
|
17846 |
+
"learning_rate": 3.138983050847458e-06,
|
17847 |
+
"loss": 1.2428,
|
17848 |
+
"step": 25370
|
17849 |
+
},
|
17850 |
+
{
|
17851 |
+
"epoch": 0.63,
|
17852 |
+
"grad_norm": 7.520709037780762,
|
17853 |
+
"learning_rate": 3.1322033898305088e-06,
|
17854 |
+
"loss": 1.1736,
|
17855 |
+
"step": 25380
|
17856 |
+
},
|
17857 |
+
{
|
17858 |
+
"epoch": 0.63,
|
17859 |
+
"grad_norm": 5.788994789123535,
|
17860 |
+
"learning_rate": 3.1254237288135598e-06,
|
17861 |
+
"loss": 1.3141,
|
17862 |
+
"step": 25390
|
17863 |
+
},
|
17864 |
+
{
|
17865 |
+
"epoch": 0.64,
|
17866 |
+
"grad_norm": 17.664766311645508,
|
17867 |
+
"learning_rate": 3.1186440677966107e-06,
|
17868 |
+
"loss": 1.357,
|
17869 |
+
"step": 25400
|
17870 |
+
},
|
17871 |
+
{
|
17872 |
+
"epoch": 0.64,
|
17873 |
+
"grad_norm": 4.366672992706299,
|
17874 |
+
"learning_rate": 3.111864406779661e-06,
|
17875 |
+
"loss": 1.3631,
|
17876 |
+
"step": 25410
|
17877 |
+
},
|
17878 |
+
{
|
17879 |
+
"epoch": 0.64,
|
17880 |
+
"grad_norm": 15.109098434448242,
|
17881 |
+
"learning_rate": 3.105084745762712e-06,
|
17882 |
+
"loss": 1.5056,
|
17883 |
+
"step": 25420
|
17884 |
+
},
|
17885 |
+
{
|
17886 |
+
"epoch": 0.64,
|
17887 |
+
"grad_norm": 12.550411224365234,
|
17888 |
+
"learning_rate": 3.098305084745763e-06,
|
17889 |
+
"loss": 1.2255,
|
17890 |
+
"step": 25430
|
17891 |
+
},
|
17892 |
+
{
|
17893 |
+
"epoch": 0.64,
|
17894 |
+
"grad_norm": 5.603880882263184,
|
17895 |
+
"learning_rate": 3.091525423728814e-06,
|
17896 |
+
"loss": 1.4206,
|
17897 |
+
"step": 25440
|
17898 |
+
},
|
17899 |
+
{
|
17900 |
+
"epoch": 0.64,
|
17901 |
+
"grad_norm": 5.365425109863281,
|
17902 |
+
"learning_rate": 3.0847457627118648e-06,
|
17903 |
+
"loss": 1.1442,
|
17904 |
+
"step": 25450
|
17905 |
+
},
|
17906 |
+
{
|
17907 |
+
"epoch": 0.64,
|
17908 |
+
"grad_norm": 6.900291442871094,
|
17909 |
+
"learning_rate": 3.0779661016949153e-06,
|
17910 |
+
"loss": 1.3042,
|
17911 |
+
"step": 25460
|
17912 |
+
},
|
17913 |
+
{
|
17914 |
+
"epoch": 0.64,
|
17915 |
+
"grad_norm": 6.29402494430542,
|
17916 |
+
"learning_rate": 3.0711864406779663e-06,
|
17917 |
+
"loss": 1.2615,
|
17918 |
+
"step": 25470
|
17919 |
+
},
|
17920 |
+
{
|
17921 |
+
"epoch": 0.64,
|
17922 |
+
"grad_norm": 11.43036937713623,
|
17923 |
+
"learning_rate": 3.0644067796610173e-06,
|
17924 |
+
"loss": 1.1831,
|
17925 |
+
"step": 25480
|
17926 |
+
},
|
17927 |
+
{
|
17928 |
+
"epoch": 0.64,
|
17929 |
+
"grad_norm": 8.487439155578613,
|
17930 |
+
"learning_rate": 3.0576271186440683e-06,
|
17931 |
+
"loss": 1.2347,
|
17932 |
+
"step": 25490
|
17933 |
+
},
|
17934 |
+
{
|
17935 |
+
"epoch": 0.64,
|
17936 |
+
"grad_norm": 4.873865127563477,
|
17937 |
+
"learning_rate": 3.0508474576271192e-06,
|
17938 |
+
"loss": 1.0908,
|
17939 |
+
"step": 25500
|
17940 |
+
},
|
17941 |
+
{
|
17942 |
+
"epoch": 0.64,
|
17943 |
+
"grad_norm": 2.734248161315918,
|
17944 |
+
"learning_rate": 3.0440677966101694e-06,
|
17945 |
+
"loss": 1.0498,
|
17946 |
+
"step": 25510
|
17947 |
+
},
|
17948 |
+
{
|
17949 |
+
"epoch": 0.64,
|
17950 |
+
"grad_norm": 12.489617347717285,
|
17951 |
+
"learning_rate": 3.0372881355932203e-06,
|
17952 |
+
"loss": 1.2103,
|
17953 |
+
"step": 25520
|
17954 |
+
},
|
17955 |
+
{
|
17956 |
+
"epoch": 0.64,
|
17957 |
+
"grad_norm": 11.316421508789062,
|
17958 |
+
"learning_rate": 3.0305084745762713e-06,
|
17959 |
+
"loss": 1.5457,
|
17960 |
+
"step": 25530
|
17961 |
+
},
|
17962 |
+
{
|
17963 |
+
"epoch": 0.64,
|
17964 |
+
"grad_norm": 7.098801612854004,
|
17965 |
+
"learning_rate": 3.0237288135593223e-06,
|
17966 |
+
"loss": 1.4746,
|
17967 |
+
"step": 25540
|
17968 |
+
},
|
17969 |
+
{
|
17970 |
+
"epoch": 0.64,
|
17971 |
+
"grad_norm": 5.496173858642578,
|
17972 |
+
"learning_rate": 3.0169491525423733e-06,
|
17973 |
+
"loss": 1.296,
|
17974 |
+
"step": 25550
|
17975 |
+
},
|
17976 |
+
{
|
17977 |
+
"epoch": 0.64,
|
17978 |
+
"grad_norm": 3.706704616546631,
|
17979 |
+
"learning_rate": 3.010169491525424e-06,
|
17980 |
+
"loss": 1.4458,
|
17981 |
+
"step": 25560
|
17982 |
+
},
|
17983 |
+
{
|
17984 |
+
"epoch": 0.64,
|
17985 |
+
"grad_norm": 10.640968322753906,
|
17986 |
+
"learning_rate": 3.003389830508475e-06,
|
17987 |
+
"loss": 1.2303,
|
17988 |
+
"step": 25570
|
17989 |
+
},
|
17990 |
+
{
|
17991 |
+
"epoch": 0.64,
|
17992 |
+
"grad_norm": 9.76960563659668,
|
17993 |
+
"learning_rate": 2.9966101694915258e-06,
|
17994 |
+
"loss": 1.3352,
|
17995 |
+
"step": 25580
|
17996 |
+
},
|
17997 |
+
{
|
17998 |
+
"epoch": 0.64,
|
17999 |
+
"grad_norm": 6.274062633514404,
|
18000 |
+
"learning_rate": 2.9898305084745768e-06,
|
18001 |
+
"loss": 1.4795,
|
18002 |
+
"step": 25590
|
18003 |
+
},
|
18004 |
+
{
|
18005 |
+
"epoch": 0.64,
|
18006 |
+
"grad_norm": 2.7021098136901855,
|
18007 |
+
"learning_rate": 2.9830508474576277e-06,
|
18008 |
+
"loss": 1.3396,
|
18009 |
+
"step": 25600
|
18010 |
+
},
|
18011 |
+
{
|
18012 |
+
"epoch": 0.64,
|
18013 |
+
"grad_norm": 4.872988224029541,
|
18014 |
+
"learning_rate": 2.9762711864406783e-06,
|
18015 |
+
"loss": 1.4767,
|
18016 |
+
"step": 25610
|
18017 |
+
},
|
18018 |
+
{
|
18019 |
+
"epoch": 0.64,
|
18020 |
+
"grad_norm": 8.984478950500488,
|
18021 |
+
"learning_rate": 2.969491525423729e-06,
|
18022 |
+
"loss": 1.3939,
|
18023 |
+
"step": 25620
|
18024 |
+
},
|
18025 |
+
{
|
18026 |
+
"epoch": 0.64,
|
18027 |
+
"grad_norm": 18.204336166381836,
|
18028 |
+
"learning_rate": 2.96271186440678e-06,
|
18029 |
+
"loss": 1.3875,
|
18030 |
+
"step": 25630
|
18031 |
+
},
|
18032 |
+
{
|
18033 |
+
"epoch": 0.64,
|
18034 |
+
"grad_norm": 6.397688388824463,
|
18035 |
+
"learning_rate": 2.955932203389831e-06,
|
18036 |
+
"loss": 1.1992,
|
18037 |
+
"step": 25640
|
18038 |
+
},
|
18039 |
+
{
|
18040 |
+
"epoch": 0.64,
|
18041 |
+
"grad_norm": 3.8524389266967773,
|
18042 |
+
"learning_rate": 2.9491525423728818e-06,
|
18043 |
+
"loss": 1.5454,
|
18044 |
+
"step": 25650
|
18045 |
+
},
|
18046 |
+
{
|
18047 |
+
"epoch": 0.64,
|
18048 |
+
"grad_norm": 6.4902191162109375,
|
18049 |
+
"learning_rate": 2.9423728813559327e-06,
|
18050 |
+
"loss": 1.3023,
|
18051 |
+
"step": 25660
|
18052 |
+
},
|
18053 |
+
{
|
18054 |
+
"epoch": 0.64,
|
18055 |
+
"grad_norm": 2.5433766841888428,
|
18056 |
+
"learning_rate": 2.935593220338983e-06,
|
18057 |
+
"loss": 1.3424,
|
18058 |
+
"step": 25670
|
18059 |
+
},
|
18060 |
+
{
|
18061 |
+
"epoch": 0.64,
|
18062 |
+
"grad_norm": 2.7309176921844482,
|
18063 |
+
"learning_rate": 2.928813559322034e-06,
|
18064 |
+
"loss": 1.2601,
|
18065 |
+
"step": 25680
|
18066 |
+
},
|
18067 |
+
{
|
18068 |
+
"epoch": 0.64,
|
18069 |
+
"grad_norm": 5.759544849395752,
|
18070 |
+
"learning_rate": 2.922033898305085e-06,
|
18071 |
+
"loss": 1.4213,
|
18072 |
+
"step": 25690
|
18073 |
+
},
|
18074 |
+
{
|
18075 |
+
"epoch": 0.64,
|
18076 |
+
"grad_norm": 8.862116813659668,
|
18077 |
+
"learning_rate": 2.915254237288136e-06,
|
18078 |
+
"loss": 1.2039,
|
18079 |
+
"step": 25700
|
18080 |
+
},
|
18081 |
+
{
|
18082 |
+
"epoch": 0.64,
|
18083 |
+
"grad_norm": 4.360088348388672,
|
18084 |
+
"learning_rate": 2.9084745762711868e-06,
|
18085 |
+
"loss": 1.2741,
|
18086 |
+
"step": 25710
|
18087 |
+
},
|
18088 |
+
{
|
18089 |
+
"epoch": 0.64,
|
18090 |
+
"grad_norm": 6.417861461639404,
|
18091 |
+
"learning_rate": 2.9016949152542373e-06,
|
18092 |
+
"loss": 1.173,
|
18093 |
+
"step": 25720
|
18094 |
+
},
|
18095 |
+
{
|
18096 |
+
"epoch": 0.64,
|
18097 |
+
"grad_norm": 4.8081254959106445,
|
18098 |
+
"learning_rate": 2.8949152542372883e-06,
|
18099 |
+
"loss": 1.3977,
|
18100 |
+
"step": 25730
|
18101 |
+
},
|
18102 |
+
{
|
18103 |
+
"epoch": 0.64,
|
18104 |
+
"grad_norm": 4.581600189208984,
|
18105 |
+
"learning_rate": 2.8881355932203393e-06,
|
18106 |
+
"loss": 1.2773,
|
18107 |
+
"step": 25740
|
18108 |
+
},
|
18109 |
+
{
|
18110 |
+
"epoch": 0.64,
|
18111 |
+
"grad_norm": 7.737278938293457,
|
18112 |
+
"learning_rate": 2.8813559322033903e-06,
|
18113 |
+
"loss": 1.3239,
|
18114 |
+
"step": 25750
|
18115 |
+
},
|
18116 |
+
{
|
18117 |
+
"epoch": 0.64,
|
18118 |
+
"grad_norm": 8.583956718444824,
|
18119 |
+
"learning_rate": 2.8745762711864412e-06,
|
18120 |
+
"loss": 1.3715,
|
18121 |
+
"step": 25760
|
18122 |
+
},
|
18123 |
+
{
|
18124 |
+
"epoch": 0.64,
|
18125 |
+
"grad_norm": 6.567660331726074,
|
18126 |
+
"learning_rate": 2.8677966101694914e-06,
|
18127 |
+
"loss": 1.0883,
|
18128 |
+
"step": 25770
|
18129 |
+
},
|
18130 |
+
{
|
18131 |
+
"epoch": 0.64,
|
18132 |
+
"grad_norm": 8.99410343170166,
|
18133 |
+
"learning_rate": 2.8610169491525424e-06,
|
18134 |
+
"loss": 1.213,
|
18135 |
+
"step": 25780
|
18136 |
+
},
|
18137 |
+
{
|
18138 |
+
"epoch": 0.64,
|
18139 |
+
"grad_norm": 7.688558101654053,
|
18140 |
+
"learning_rate": 2.8542372881355933e-06,
|
18141 |
+
"loss": 1.147,
|
18142 |
+
"step": 25790
|
18143 |
+
},
|
18144 |
+
{
|
18145 |
+
"epoch": 0.65,
|
18146 |
+
"grad_norm": 9.962355613708496,
|
18147 |
+
"learning_rate": 2.8474576271186443e-06,
|
18148 |
+
"loss": 1.243,
|
18149 |
+
"step": 25800
|
18150 |
+
},
|
18151 |
+
{
|
18152 |
+
"epoch": 0.65,
|
18153 |
+
"grad_norm": 2.2275478839874268,
|
18154 |
+
"learning_rate": 2.8406779661016953e-06,
|
18155 |
+
"loss": 1.2328,
|
18156 |
+
"step": 25810
|
18157 |
+
},
|
18158 |
+
{
|
18159 |
+
"epoch": 0.65,
|
18160 |
+
"grad_norm": 3.4127414226531982,
|
18161 |
+
"learning_rate": 2.833898305084746e-06,
|
18162 |
+
"loss": 1.4244,
|
18163 |
+
"step": 25820
|
18164 |
+
},
|
18165 |
+
{
|
18166 |
+
"epoch": 0.65,
|
18167 |
+
"grad_norm": 12.15282917022705,
|
18168 |
+
"learning_rate": 2.827118644067797e-06,
|
18169 |
+
"loss": 1.401,
|
18170 |
+
"step": 25830
|
18171 |
+
},
|
18172 |
+
{
|
18173 |
+
"epoch": 0.65,
|
18174 |
+
"grad_norm": 8.007610321044922,
|
18175 |
+
"learning_rate": 2.820338983050848e-06,
|
18176 |
+
"loss": 1.3701,
|
18177 |
+
"step": 25840
|
18178 |
+
},
|
18179 |
+
{
|
18180 |
+
"epoch": 0.65,
|
18181 |
+
"grad_norm": 9.589988708496094,
|
18182 |
+
"learning_rate": 2.8135593220338988e-06,
|
18183 |
+
"loss": 1.2886,
|
18184 |
+
"step": 25850
|
18185 |
+
},
|
18186 |
+
{
|
18187 |
+
"epoch": 0.65,
|
18188 |
+
"grad_norm": 4.063002109527588,
|
18189 |
+
"learning_rate": 2.8067796610169497e-06,
|
18190 |
+
"loss": 1.3383,
|
18191 |
+
"step": 25860
|
18192 |
+
},
|
18193 |
+
{
|
18194 |
+
"epoch": 0.65,
|
18195 |
+
"grad_norm": 2.1042330265045166,
|
18196 |
+
"learning_rate": 2.8000000000000003e-06,
|
18197 |
+
"loss": 1.2753,
|
18198 |
+
"step": 25870
|
18199 |
+
},
|
18200 |
+
{
|
18201 |
+
"epoch": 0.65,
|
18202 |
+
"grad_norm": 23.256053924560547,
|
18203 |
+
"learning_rate": 2.793220338983051e-06,
|
18204 |
+
"loss": 1.3617,
|
18205 |
+
"step": 25880
|
18206 |
+
},
|
18207 |
+
{
|
18208 |
+
"epoch": 0.65,
|
18209 |
+
"grad_norm": 7.6475911140441895,
|
18210 |
+
"learning_rate": 2.786440677966102e-06,
|
18211 |
+
"loss": 1.2862,
|
18212 |
+
"step": 25890
|
18213 |
+
},
|
18214 |
+
{
|
18215 |
+
"epoch": 0.65,
|
18216 |
+
"grad_norm": 16.81471824645996,
|
18217 |
+
"learning_rate": 2.779661016949153e-06,
|
18218 |
+
"loss": 1.5188,
|
18219 |
+
"step": 25900
|
18220 |
+
},
|
18221 |
+
{
|
18222 |
+
"epoch": 0.65,
|
18223 |
+
"grad_norm": 10.20080852508545,
|
18224 |
+
"learning_rate": 2.7728813559322038e-06,
|
18225 |
+
"loss": 1.5414,
|
18226 |
+
"step": 25910
|
18227 |
+
},
|
18228 |
+
{
|
18229 |
+
"epoch": 0.65,
|
18230 |
+
"grad_norm": 9.043648719787598,
|
18231 |
+
"learning_rate": 2.7661016949152548e-06,
|
18232 |
+
"loss": 1.3299,
|
18233 |
+
"step": 25920
|
18234 |
+
},
|
18235 |
+
{
|
18236 |
+
"epoch": 0.65,
|
18237 |
+
"grad_norm": 4.330277442932129,
|
18238 |
+
"learning_rate": 2.7593220338983053e-06,
|
18239 |
+
"loss": 1.1826,
|
18240 |
+
"step": 25930
|
18241 |
+
},
|
18242 |
+
{
|
18243 |
+
"epoch": 0.65,
|
18244 |
+
"grad_norm": 7.962332725524902,
|
18245 |
+
"learning_rate": 2.752542372881356e-06,
|
18246 |
+
"loss": 1.3566,
|
18247 |
+
"step": 25940
|
18248 |
+
},
|
18249 |
+
{
|
18250 |
+
"epoch": 0.65,
|
18251 |
+
"grad_norm": 8.763781547546387,
|
18252 |
+
"learning_rate": 2.745762711864407e-06,
|
18253 |
+
"loss": 1.3485,
|
18254 |
+
"step": 25950
|
18255 |
+
},
|
18256 |
+
{
|
18257 |
+
"epoch": 0.65,
|
18258 |
+
"grad_norm": 6.160745620727539,
|
18259 |
+
"learning_rate": 2.738983050847458e-06,
|
18260 |
+
"loss": 1.303,
|
18261 |
+
"step": 25960
|
18262 |
+
},
|
18263 |
+
{
|
18264 |
+
"epoch": 0.65,
|
18265 |
+
"grad_norm": 2.6451609134674072,
|
18266 |
+
"learning_rate": 2.732203389830509e-06,
|
18267 |
+
"loss": 1.4117,
|
18268 |
+
"step": 25970
|
18269 |
+
},
|
18270 |
+
{
|
18271 |
+
"epoch": 0.65,
|
18272 |
+
"grad_norm": 10.857940673828125,
|
18273 |
+
"learning_rate": 2.7254237288135593e-06,
|
18274 |
+
"loss": 1.1566,
|
18275 |
+
"step": 25980
|
18276 |
+
},
|
18277 |
+
{
|
18278 |
+
"epoch": 0.65,
|
18279 |
+
"grad_norm": 5.1549601554870605,
|
18280 |
+
"learning_rate": 2.7186440677966103e-06,
|
18281 |
+
"loss": 1.2738,
|
18282 |
+
"step": 25990
|
18283 |
+
},
|
18284 |
+
{
|
18285 |
+
"epoch": 0.65,
|
18286 |
+
"grad_norm": 7.437528610229492,
|
18287 |
+
"learning_rate": 2.7118644067796613e-06,
|
18288 |
+
"loss": 1.4507,
|
18289 |
+
"step": 26000
|
18290 |
+
},
|
18291 |
+
{
|
18292 |
+
"epoch": 0.65,
|
18293 |
+
"grad_norm": 6.80765962600708,
|
18294 |
+
"learning_rate": 2.7050847457627123e-06,
|
18295 |
+
"loss": 1.1497,
|
18296 |
+
"step": 26010
|
18297 |
+
},
|
18298 |
+
{
|
18299 |
+
"epoch": 0.65,
|
18300 |
+
"grad_norm": 6.173390865325928,
|
18301 |
+
"learning_rate": 2.6983050847457633e-06,
|
18302 |
+
"loss": 1.3426,
|
18303 |
+
"step": 26020
|
18304 |
+
},
|
18305 |
+
{
|
18306 |
+
"epoch": 0.65,
|
18307 |
+
"grad_norm": 6.7725911140441895,
|
18308 |
+
"learning_rate": 2.6915254237288134e-06,
|
18309 |
+
"loss": 1.3951,
|
18310 |
+
"step": 26030
|
18311 |
+
},
|
18312 |
+
{
|
18313 |
+
"epoch": 0.65,
|
18314 |
+
"grad_norm": 6.6503777503967285,
|
18315 |
+
"learning_rate": 2.6847457627118644e-06,
|
18316 |
+
"loss": 1.4507,
|
18317 |
+
"step": 26040
|
18318 |
+
},
|
18319 |
+
{
|
18320 |
+
"epoch": 0.65,
|
18321 |
+
"grad_norm": 5.210537433624268,
|
18322 |
+
"learning_rate": 2.6779661016949153e-06,
|
18323 |
+
"loss": 1.3876,
|
18324 |
+
"step": 26050
|
18325 |
+
},
|
18326 |
+
{
|
18327 |
+
"epoch": 0.65,
|
18328 |
+
"grad_norm": 3.615936756134033,
|
18329 |
+
"learning_rate": 2.6711864406779663e-06,
|
18330 |
+
"loss": 1.4476,
|
18331 |
+
"step": 26060
|
18332 |
+
},
|
18333 |
+
{
|
18334 |
+
"epoch": 0.65,
|
18335 |
+
"grad_norm": 9.065774917602539,
|
18336 |
+
"learning_rate": 2.6644067796610173e-06,
|
18337 |
+
"loss": 1.3124,
|
18338 |
+
"step": 26070
|
18339 |
+
},
|
18340 |
+
{
|
18341 |
+
"epoch": 0.65,
|
18342 |
+
"grad_norm": 13.187819480895996,
|
18343 |
+
"learning_rate": 2.657627118644068e-06,
|
18344 |
+
"loss": 1.2038,
|
18345 |
+
"step": 26080
|
18346 |
+
},
|
18347 |
+
{
|
18348 |
+
"epoch": 0.65,
|
18349 |
+
"grad_norm": 13.807534217834473,
|
18350 |
+
"learning_rate": 2.650847457627119e-06,
|
18351 |
+
"loss": 1.2118,
|
18352 |
+
"step": 26090
|
18353 |
+
},
|
18354 |
+
{
|
18355 |
+
"epoch": 0.65,
|
18356 |
+
"grad_norm": 16.19401741027832,
|
18357 |
+
"learning_rate": 2.64406779661017e-06,
|
18358 |
+
"loss": 1.2734,
|
18359 |
+
"step": 26100
|
18360 |
+
},
|
18361 |
+
{
|
18362 |
+
"epoch": 0.65,
|
18363 |
+
"grad_norm": 7.493824005126953,
|
18364 |
+
"learning_rate": 2.6372881355932208e-06,
|
18365 |
+
"loss": 1.3692,
|
18366 |
+
"step": 26110
|
18367 |
+
},
|
18368 |
+
{
|
18369 |
+
"epoch": 0.65,
|
18370 |
+
"grad_norm": 16.29339599609375,
|
18371 |
+
"learning_rate": 2.6305084745762718e-06,
|
18372 |
+
"loss": 1.3132,
|
18373 |
+
"step": 26120
|
18374 |
+
},
|
18375 |
+
{
|
18376 |
+
"epoch": 0.65,
|
18377 |
+
"grad_norm": 6.331333160400391,
|
18378 |
+
"learning_rate": 2.6237288135593223e-06,
|
18379 |
+
"loss": 1.3203,
|
18380 |
+
"step": 26130
|
18381 |
+
},
|
18382 |
+
{
|
18383 |
+
"epoch": 0.65,
|
18384 |
+
"grad_norm": 6.448307991027832,
|
18385 |
+
"learning_rate": 2.616949152542373e-06,
|
18386 |
+
"loss": 1.2753,
|
18387 |
+
"step": 26140
|
18388 |
+
},
|
18389 |
+
{
|
18390 |
+
"epoch": 0.65,
|
18391 |
+
"grad_norm": 6.896134376525879,
|
18392 |
+
"learning_rate": 2.610169491525424e-06,
|
18393 |
+
"loss": 1.349,
|
18394 |
+
"step": 26150
|
18395 |
+
},
|
18396 |
+
{
|
18397 |
+
"epoch": 0.65,
|
18398 |
+
"grad_norm": 5.189770698547363,
|
18399 |
+
"learning_rate": 2.603389830508475e-06,
|
18400 |
+
"loss": 1.289,
|
18401 |
+
"step": 26160
|
18402 |
+
},
|
18403 |
+
{
|
18404 |
+
"epoch": 0.65,
|
18405 |
+
"grad_norm": 1.7721081972122192,
|
18406 |
+
"learning_rate": 2.596610169491526e-06,
|
18407 |
+
"loss": 1.4432,
|
18408 |
+
"step": 26170
|
18409 |
+
},
|
18410 |
+
{
|
18411 |
+
"epoch": 0.65,
|
18412 |
+
"grad_norm": 3.9717156887054443,
|
18413 |
+
"learning_rate": 2.5898305084745768e-06,
|
18414 |
+
"loss": 1.2982,
|
18415 |
+
"step": 26180
|
18416 |
+
},
|
18417 |
+
{
|
18418 |
+
"epoch": 0.65,
|
18419 |
+
"grad_norm": 19.084896087646484,
|
18420 |
+
"learning_rate": 2.5830508474576273e-06,
|
18421 |
+
"loss": 1.3897,
|
18422 |
+
"step": 26190
|
18423 |
+
},
|
18424 |
+
{
|
18425 |
+
"epoch": 0.66,
|
18426 |
+
"grad_norm": 1.6735248565673828,
|
18427 |
+
"learning_rate": 2.576271186440678e-06,
|
18428 |
+
"loss": 1.4137,
|
18429 |
+
"step": 26200
|
18430 |
+
},
|
18431 |
+
{
|
18432 |
+
"epoch": 0.66,
|
18433 |
+
"grad_norm": 8.025382041931152,
|
18434 |
+
"learning_rate": 2.569491525423729e-06,
|
18435 |
+
"loss": 1.2629,
|
18436 |
+
"step": 26210
|
18437 |
+
},
|
18438 |
+
{
|
18439 |
+
"epoch": 0.66,
|
18440 |
+
"grad_norm": 4.695014476776123,
|
18441 |
+
"learning_rate": 2.56271186440678e-06,
|
18442 |
+
"loss": 1.5102,
|
18443 |
+
"step": 26220
|
18444 |
+
},
|
18445 |
+
{
|
18446 |
+
"epoch": 0.66,
|
18447 |
+
"grad_norm": 4.135346412658691,
|
18448 |
+
"learning_rate": 2.555932203389831e-06,
|
18449 |
+
"loss": 1.1628,
|
18450 |
+
"step": 26230
|
18451 |
+
},
|
18452 |
+
{
|
18453 |
+
"epoch": 0.66,
|
18454 |
+
"grad_norm": 6.607401371002197,
|
18455 |
+
"learning_rate": 2.5491525423728814e-06,
|
18456 |
+
"loss": 1.305,
|
18457 |
+
"step": 26240
|
18458 |
+
},
|
18459 |
+
{
|
18460 |
+
"epoch": 0.66,
|
18461 |
+
"grad_norm": 17.407390594482422,
|
18462 |
+
"learning_rate": 2.5423728813559323e-06,
|
18463 |
+
"loss": 1.3841,
|
18464 |
+
"step": 26250
|
18465 |
+
},
|
18466 |
+
{
|
18467 |
+
"epoch": 0.66,
|
18468 |
+
"grad_norm": 13.363433837890625,
|
18469 |
+
"learning_rate": 2.5355932203389833e-06,
|
18470 |
+
"loss": 1.4593,
|
18471 |
+
"step": 26260
|
18472 |
+
},
|
18473 |
+
{
|
18474 |
+
"epoch": 0.66,
|
18475 |
+
"grad_norm": 4.77979040145874,
|
18476 |
+
"learning_rate": 2.5288135593220343e-06,
|
18477 |
+
"loss": 1.2445,
|
18478 |
+
"step": 26270
|
18479 |
+
},
|
18480 |
+
{
|
18481 |
+
"epoch": 0.66,
|
18482 |
+
"grad_norm": 10.652926445007324,
|
18483 |
+
"learning_rate": 2.5220338983050853e-06,
|
18484 |
+
"loss": 1.3092,
|
18485 |
+
"step": 26280
|
18486 |
+
},
|
18487 |
+
{
|
18488 |
+
"epoch": 0.66,
|
18489 |
+
"grad_norm": 5.278314113616943,
|
18490 |
+
"learning_rate": 2.5152542372881354e-06,
|
18491 |
+
"loss": 1.447,
|
18492 |
+
"step": 26290
|
18493 |
+
},
|
18494 |
+
{
|
18495 |
+
"epoch": 0.66,
|
18496 |
+
"grad_norm": 6.439229488372803,
|
18497 |
+
"learning_rate": 2.5084745762711864e-06,
|
18498 |
+
"loss": 1.3421,
|
18499 |
+
"step": 26300
|
18500 |
+
},
|
18501 |
+
{
|
18502 |
+
"epoch": 0.66,
|
18503 |
+
"grad_norm": 4.738833904266357,
|
18504 |
+
"learning_rate": 2.5016949152542374e-06,
|
18505 |
+
"loss": 1.3454,
|
18506 |
+
"step": 26310
|
18507 |
+
},
|
18508 |
+
{
|
18509 |
+
"epoch": 0.66,
|
18510 |
+
"grad_norm": 4.070488929748535,
|
18511 |
+
"learning_rate": 2.4949152542372883e-06,
|
18512 |
+
"loss": 1.377,
|
18513 |
+
"step": 26320
|
18514 |
+
},
|
18515 |
+
{
|
18516 |
+
"epoch": 0.66,
|
18517 |
+
"grad_norm": 1.9005275964736938,
|
18518 |
+
"learning_rate": 2.488135593220339e-06,
|
18519 |
+
"loss": 1.4501,
|
18520 |
+
"step": 26330
|
18521 |
+
},
|
18522 |
+
{
|
18523 |
+
"epoch": 0.66,
|
18524 |
+
"grad_norm": 9.970990180969238,
|
18525 |
+
"learning_rate": 2.48135593220339e-06,
|
18526 |
+
"loss": 1.1534,
|
18527 |
+
"step": 26340
|
18528 |
+
},
|
18529 |
+
{
|
18530 |
+
"epoch": 0.66,
|
18531 |
+
"grad_norm": 2.7662065029144287,
|
18532 |
+
"learning_rate": 2.474576271186441e-06,
|
18533 |
+
"loss": 1.4422,
|
18534 |
+
"step": 26350
|
18535 |
+
},
|
18536 |
+
{
|
18537 |
+
"epoch": 0.66,
|
18538 |
+
"grad_norm": 11.093968391418457,
|
18539 |
+
"learning_rate": 2.467796610169492e-06,
|
18540 |
+
"loss": 1.198,
|
18541 |
+
"step": 26360
|
18542 |
+
},
|
18543 |
+
{
|
18544 |
+
"epoch": 0.66,
|
18545 |
+
"grad_norm": 6.7317280769348145,
|
18546 |
+
"learning_rate": 2.461016949152543e-06,
|
18547 |
+
"loss": 1.244,
|
18548 |
+
"step": 26370
|
18549 |
+
},
|
18550 |
+
{
|
18551 |
+
"epoch": 0.66,
|
18552 |
+
"grad_norm": 8.76866340637207,
|
18553 |
+
"learning_rate": 2.4542372881355933e-06,
|
18554 |
+
"loss": 1.3653,
|
18555 |
+
"step": 26380
|
18556 |
+
},
|
18557 |
+
{
|
18558 |
+
"epoch": 0.66,
|
18559 |
+
"grad_norm": 11.940791130065918,
|
18560 |
+
"learning_rate": 2.4474576271186443e-06,
|
18561 |
+
"loss": 1.304,
|
18562 |
+
"step": 26390
|
18563 |
+
},
|
18564 |
+
{
|
18565 |
+
"epoch": 0.66,
|
18566 |
+
"grad_norm": 6.687407970428467,
|
18567 |
+
"learning_rate": 2.4406779661016953e-06,
|
18568 |
+
"loss": 1.1965,
|
18569 |
+
"step": 26400
|
18570 |
+
},
|
18571 |
+
{
|
18572 |
+
"epoch": 0.66,
|
18573 |
+
"grad_norm": 5.42927885055542,
|
18574 |
+
"learning_rate": 2.433898305084746e-06,
|
18575 |
+
"loss": 1.3501,
|
18576 |
+
"step": 26410
|
18577 |
+
},
|
18578 |
+
{
|
18579 |
+
"epoch": 0.66,
|
18580 |
+
"grad_norm": 2.731924057006836,
|
18581 |
+
"learning_rate": 2.427118644067797e-06,
|
18582 |
+
"loss": 1.4572,
|
18583 |
+
"step": 26420
|
18584 |
+
},
|
18585 |
+
{
|
18586 |
+
"epoch": 0.66,
|
18587 |
+
"grad_norm": 5.305939197540283,
|
18588 |
+
"learning_rate": 2.4203389830508474e-06,
|
18589 |
+
"loss": 1.3428,
|
18590 |
+
"step": 26430
|
18591 |
+
},
|
18592 |
+
{
|
18593 |
+
"epoch": 0.66,
|
18594 |
+
"grad_norm": 10.32532787322998,
|
18595 |
+
"learning_rate": 2.4135593220338984e-06,
|
18596 |
+
"loss": 1.4827,
|
18597 |
+
"step": 26440
|
18598 |
+
},
|
18599 |
+
{
|
18600 |
+
"epoch": 0.66,
|
18601 |
+
"grad_norm": 7.55979585647583,
|
18602 |
+
"learning_rate": 2.4067796610169493e-06,
|
18603 |
+
"loss": 1.2877,
|
18604 |
+
"step": 26450
|
18605 |
+
},
|
18606 |
+
{
|
18607 |
+
"epoch": 0.66,
|
18608 |
+
"grad_norm": 9.092228889465332,
|
18609 |
+
"learning_rate": 2.4000000000000003e-06,
|
18610 |
+
"loss": 1.3516,
|
18611 |
+
"step": 26460
|
18612 |
+
},
|
18613 |
+
{
|
18614 |
+
"epoch": 0.66,
|
18615 |
+
"grad_norm": 4.732894420623779,
|
18616 |
+
"learning_rate": 2.393220338983051e-06,
|
18617 |
+
"loss": 1.3218,
|
18618 |
+
"step": 26470
|
18619 |
+
},
|
18620 |
+
{
|
18621 |
+
"epoch": 0.66,
|
18622 |
+
"grad_norm": 8.649917602539062,
|
18623 |
+
"learning_rate": 2.386440677966102e-06,
|
18624 |
+
"loss": 1.2708,
|
18625 |
+
"step": 26480
|
18626 |
+
},
|
18627 |
+
{
|
18628 |
+
"epoch": 0.66,
|
18629 |
+
"grad_norm": 4.569608211517334,
|
18630 |
+
"learning_rate": 2.379661016949153e-06,
|
18631 |
+
"loss": 1.3422,
|
18632 |
+
"step": 26490
|
18633 |
+
},
|
18634 |
+
{
|
18635 |
+
"epoch": 0.66,
|
18636 |
+
"grad_norm": 3.702059030532837,
|
18637 |
+
"learning_rate": 2.372881355932204e-06,
|
18638 |
+
"loss": 1.4754,
|
18639 |
+
"step": 26500
|
18640 |
+
},
|
18641 |
+
{
|
18642 |
+
"epoch": 0.66,
|
18643 |
+
"grad_norm": 3.9777114391326904,
|
18644 |
+
"learning_rate": 2.3661016949152544e-06,
|
18645 |
+
"loss": 1.3133,
|
18646 |
+
"step": 26510
|
18647 |
+
},
|
18648 |
+
{
|
18649 |
+
"epoch": 0.66,
|
18650 |
+
"grad_norm": 9.692605018615723,
|
18651 |
+
"learning_rate": 2.3593220338983053e-06,
|
18652 |
+
"loss": 1.2866,
|
18653 |
+
"step": 26520
|
18654 |
+
},
|
18655 |
+
{
|
18656 |
+
"epoch": 0.66,
|
18657 |
+
"grad_norm": 2.1870622634887695,
|
18658 |
+
"learning_rate": 2.3525423728813563e-06,
|
18659 |
+
"loss": 1.4271,
|
18660 |
+
"step": 26530
|
18661 |
+
},
|
18662 |
+
{
|
18663 |
+
"epoch": 0.66,
|
18664 |
+
"grad_norm": 6.799996852874756,
|
18665 |
+
"learning_rate": 2.345762711864407e-06,
|
18666 |
+
"loss": 1.3768,
|
18667 |
+
"step": 26540
|
18668 |
+
},
|
18669 |
+
{
|
18670 |
+
"epoch": 0.66,
|
18671 |
+
"grad_norm": 2.3258345127105713,
|
18672 |
+
"learning_rate": 2.338983050847458e-06,
|
18673 |
+
"loss": 1.4023,
|
18674 |
+
"step": 26550
|
18675 |
+
},
|
18676 |
+
{
|
18677 |
+
"epoch": 0.66,
|
18678 |
+
"grad_norm": 3.950892925262451,
|
18679 |
+
"learning_rate": 2.3322033898305084e-06,
|
18680 |
+
"loss": 1.2986,
|
18681 |
+
"step": 26560
|
18682 |
+
},
|
18683 |
+
{
|
18684 |
+
"epoch": 0.66,
|
18685 |
+
"grad_norm": 11.114343643188477,
|
18686 |
+
"learning_rate": 2.3254237288135594e-06,
|
18687 |
+
"loss": 1.3538,
|
18688 |
+
"step": 26570
|
18689 |
+
},
|
18690 |
+
{
|
18691 |
+
"epoch": 0.66,
|
18692 |
+
"grad_norm": 8.47208023071289,
|
18693 |
+
"learning_rate": 2.3186440677966103e-06,
|
18694 |
+
"loss": 1.3557,
|
18695 |
+
"step": 26580
|
18696 |
+
},
|
18697 |
+
{
|
18698 |
+
"epoch": 0.66,
|
18699 |
+
"grad_norm": 15.794944763183594,
|
18700 |
+
"learning_rate": 2.3118644067796613e-06,
|
18701 |
+
"loss": 1.3295,
|
18702 |
+
"step": 26590
|
18703 |
+
},
|
18704 |
+
{
|
18705 |
+
"epoch": 0.67,
|
18706 |
+
"grad_norm": 5.596043586730957,
|
18707 |
+
"learning_rate": 2.305084745762712e-06,
|
18708 |
+
"loss": 1.2669,
|
18709 |
+
"step": 26600
|
18710 |
+
},
|
18711 |
+
{
|
18712 |
+
"epoch": 0.67,
|
18713 |
+
"grad_norm": 8.727288246154785,
|
18714 |
+
"learning_rate": 2.298305084745763e-06,
|
18715 |
+
"loss": 1.339,
|
18716 |
+
"step": 26610
|
18717 |
+
},
|
18718 |
+
{
|
18719 |
+
"epoch": 0.67,
|
18720 |
+
"grad_norm": 10.842510223388672,
|
18721 |
+
"learning_rate": 2.291525423728814e-06,
|
18722 |
+
"loss": 1.2154,
|
18723 |
+
"step": 26620
|
18724 |
+
},
|
18725 |
+
{
|
18726 |
+
"epoch": 0.67,
|
18727 |
+
"grad_norm": 11.826702117919922,
|
18728 |
+
"learning_rate": 2.284745762711865e-06,
|
18729 |
+
"loss": 1.1599,
|
18730 |
+
"step": 26630
|
18731 |
+
},
|
18732 |
+
{
|
18733 |
+
"epoch": 0.67,
|
18734 |
+
"grad_norm": 16.47806167602539,
|
18735 |
+
"learning_rate": 2.2779661016949154e-06,
|
18736 |
+
"loss": 1.3005,
|
18737 |
+
"step": 26640
|
18738 |
+
},
|
18739 |
+
{
|
18740 |
+
"epoch": 0.67,
|
18741 |
+
"grad_norm": 4.797351837158203,
|
18742 |
+
"learning_rate": 2.2711864406779663e-06,
|
18743 |
+
"loss": 1.2779,
|
18744 |
+
"step": 26650
|
18745 |
+
},
|
18746 |
+
{
|
18747 |
+
"epoch": 0.67,
|
18748 |
+
"grad_norm": 6.353465557098389,
|
18749 |
+
"learning_rate": 2.2644067796610173e-06,
|
18750 |
+
"loss": 1.2419,
|
18751 |
+
"step": 26660
|
18752 |
+
},
|
18753 |
+
{
|
18754 |
+
"epoch": 0.67,
|
18755 |
+
"grad_norm": 10.895663261413574,
|
18756 |
+
"learning_rate": 2.257627118644068e-06,
|
18757 |
+
"loss": 1.2078,
|
18758 |
+
"step": 26670
|
18759 |
+
},
|
18760 |
+
{
|
18761 |
+
"epoch": 0.67,
|
18762 |
+
"grad_norm": 7.583923816680908,
|
18763 |
+
"learning_rate": 2.250847457627119e-06,
|
18764 |
+
"loss": 1.3381,
|
18765 |
+
"step": 26680
|
18766 |
+
},
|
18767 |
+
{
|
18768 |
+
"epoch": 0.67,
|
18769 |
+
"grad_norm": 10.84875774383545,
|
18770 |
+
"learning_rate": 2.2440677966101694e-06,
|
18771 |
+
"loss": 1.4887,
|
18772 |
+
"step": 26690
|
18773 |
+
},
|
18774 |
+
{
|
18775 |
+
"epoch": 0.67,
|
18776 |
+
"grad_norm": 5.171149253845215,
|
18777 |
+
"learning_rate": 2.2372881355932204e-06,
|
18778 |
+
"loss": 1.3469,
|
18779 |
+
"step": 26700
|
18780 |
+
},
|
18781 |
+
{
|
18782 |
+
"epoch": 0.67,
|
18783 |
+
"grad_norm": 6.136636734008789,
|
18784 |
+
"learning_rate": 2.2305084745762714e-06,
|
18785 |
+
"loss": 1.1804,
|
18786 |
+
"step": 26710
|
18787 |
+
},
|
18788 |
+
{
|
18789 |
+
"epoch": 0.67,
|
18790 |
+
"grad_norm": 12.95764446258545,
|
18791 |
+
"learning_rate": 2.2237288135593223e-06,
|
18792 |
+
"loss": 1.2808,
|
18793 |
+
"step": 26720
|
18794 |
+
},
|
18795 |
+
{
|
18796 |
+
"epoch": 0.67,
|
18797 |
+
"grad_norm": 4.0281453132629395,
|
18798 |
+
"learning_rate": 2.216949152542373e-06,
|
18799 |
+
"loss": 1.4454,
|
18800 |
+
"step": 26730
|
18801 |
+
},
|
18802 |
+
{
|
18803 |
+
"epoch": 0.67,
|
18804 |
+
"grad_norm": 5.7566609382629395,
|
18805 |
+
"learning_rate": 2.210169491525424e-06,
|
18806 |
+
"loss": 1.2968,
|
18807 |
+
"step": 26740
|
18808 |
+
},
|
18809 |
+
{
|
18810 |
+
"epoch": 0.67,
|
18811 |
+
"grad_norm": 4.710749626159668,
|
18812 |
+
"learning_rate": 2.203389830508475e-06,
|
18813 |
+
"loss": 1.355,
|
18814 |
+
"step": 26750
|
18815 |
+
},
|
18816 |
+
{
|
18817 |
+
"epoch": 0.67,
|
18818 |
+
"grad_norm": 3.0553205013275146,
|
18819 |
+
"learning_rate": 2.196610169491526e-06,
|
18820 |
+
"loss": 1.3319,
|
18821 |
+
"step": 26760
|
18822 |
+
},
|
18823 |
+
{
|
18824 |
+
"epoch": 0.67,
|
18825 |
+
"grad_norm": 15.849903106689453,
|
18826 |
+
"learning_rate": 2.1898305084745764e-06,
|
18827 |
+
"loss": 1.3233,
|
18828 |
+
"step": 26770
|
18829 |
+
},
|
18830 |
+
{
|
18831 |
+
"epoch": 0.67,
|
18832 |
+
"grad_norm": 31.49736785888672,
|
18833 |
+
"learning_rate": 2.1830508474576273e-06,
|
18834 |
+
"loss": 1.3991,
|
18835 |
+
"step": 26780
|
18836 |
+
},
|
18837 |
+
{
|
18838 |
+
"epoch": 0.67,
|
18839 |
+
"grad_norm": 11.734864234924316,
|
18840 |
+
"learning_rate": 2.1762711864406783e-06,
|
18841 |
+
"loss": 1.5305,
|
18842 |
+
"step": 26790
|
18843 |
+
},
|
18844 |
+
{
|
18845 |
+
"epoch": 0.67,
|
18846 |
+
"grad_norm": 6.124046325683594,
|
18847 |
+
"learning_rate": 2.169491525423729e-06,
|
18848 |
+
"loss": 1.249,
|
18849 |
+
"step": 26800
|
18850 |
+
},
|
18851 |
+
{
|
18852 |
+
"epoch": 0.67,
|
18853 |
+
"grad_norm": 11.438417434692383,
|
18854 |
+
"learning_rate": 2.16271186440678e-06,
|
18855 |
+
"loss": 1.2922,
|
18856 |
+
"step": 26810
|
18857 |
+
},
|
18858 |
+
{
|
18859 |
+
"epoch": 0.67,
|
18860 |
+
"grad_norm": 12.979373931884766,
|
18861 |
+
"learning_rate": 2.1559322033898304e-06,
|
18862 |
+
"loss": 1.3554,
|
18863 |
+
"step": 26820
|
18864 |
+
},
|
18865 |
+
{
|
18866 |
+
"epoch": 0.67,
|
18867 |
+
"grad_norm": 3.8955001831054688,
|
18868 |
+
"learning_rate": 2.1491525423728814e-06,
|
18869 |
+
"loss": 1.3303,
|
18870 |
+
"step": 26830
|
18871 |
+
},
|
18872 |
+
{
|
18873 |
+
"epoch": 0.67,
|
18874 |
+
"grad_norm": 9.349483489990234,
|
18875 |
+
"learning_rate": 2.1423728813559324e-06,
|
18876 |
+
"loss": 1.2548,
|
18877 |
+
"step": 26840
|
18878 |
+
},
|
18879 |
+
{
|
18880 |
+
"epoch": 0.67,
|
18881 |
+
"grad_norm": 2.8842084407806396,
|
18882 |
+
"learning_rate": 2.1355932203389833e-06,
|
18883 |
+
"loss": 1.3517,
|
18884 |
+
"step": 26850
|
18885 |
+
},
|
18886 |
+
{
|
18887 |
+
"epoch": 0.67,
|
18888 |
+
"grad_norm": 3.986353635787964,
|
18889 |
+
"learning_rate": 2.128813559322034e-06,
|
18890 |
+
"loss": 1.5566,
|
18891 |
+
"step": 26860
|
18892 |
+
},
|
18893 |
+
{
|
18894 |
+
"epoch": 0.67,
|
18895 |
+
"grad_norm": 14.33786392211914,
|
18896 |
+
"learning_rate": 2.122033898305085e-06,
|
18897 |
+
"loss": 1.2168,
|
18898 |
+
"step": 26870
|
18899 |
+
},
|
18900 |
+
{
|
18901 |
+
"epoch": 0.67,
|
18902 |
+
"grad_norm": 4.677867889404297,
|
18903 |
+
"learning_rate": 2.115254237288136e-06,
|
18904 |
+
"loss": 1.2758,
|
18905 |
+
"step": 26880
|
18906 |
+
},
|
18907 |
+
{
|
18908 |
+
"epoch": 0.67,
|
18909 |
+
"grad_norm": 3.638185977935791,
|
18910 |
+
"learning_rate": 2.108474576271187e-06,
|
18911 |
+
"loss": 1.2348,
|
18912 |
+
"step": 26890
|
18913 |
+
},
|
18914 |
+
{
|
18915 |
+
"epoch": 0.67,
|
18916 |
+
"grad_norm": 2.7823917865753174,
|
18917 |
+
"learning_rate": 2.1016949152542374e-06,
|
18918 |
+
"loss": 1.4506,
|
18919 |
+
"step": 26900
|
18920 |
+
},
|
18921 |
+
{
|
18922 |
+
"epoch": 0.67,
|
18923 |
+
"grad_norm": 14.349405288696289,
|
18924 |
+
"learning_rate": 2.0949152542372883e-06,
|
18925 |
+
"loss": 1.2189,
|
18926 |
+
"step": 26910
|
18927 |
+
},
|
18928 |
+
{
|
18929 |
+
"epoch": 0.67,
|
18930 |
+
"grad_norm": 5.958116054534912,
|
18931 |
+
"learning_rate": 2.0881355932203393e-06,
|
18932 |
+
"loss": 1.0776,
|
18933 |
+
"step": 26920
|
18934 |
+
},
|
18935 |
+
{
|
18936 |
+
"epoch": 0.67,
|
18937 |
+
"grad_norm": 5.689637184143066,
|
18938 |
+
"learning_rate": 2.08135593220339e-06,
|
18939 |
+
"loss": 1.5885,
|
18940 |
+
"step": 26930
|
18941 |
+
},
|
18942 |
+
{
|
18943 |
+
"epoch": 0.67,
|
18944 |
+
"grad_norm": 17.451379776000977,
|
18945 |
+
"learning_rate": 2.074576271186441e-06,
|
18946 |
+
"loss": 1.4116,
|
18947 |
+
"step": 26940
|
18948 |
+
},
|
18949 |
+
{
|
18950 |
+
"epoch": 0.67,
|
18951 |
+
"grad_norm": 6.859378814697266,
|
18952 |
+
"learning_rate": 2.0677966101694914e-06,
|
18953 |
+
"loss": 1.1968,
|
18954 |
+
"step": 26950
|
18955 |
+
},
|
18956 |
+
{
|
18957 |
+
"epoch": 0.67,
|
18958 |
+
"grad_norm": 5.8354082107543945,
|
18959 |
+
"learning_rate": 2.0610169491525424e-06,
|
18960 |
+
"loss": 1.3615,
|
18961 |
+
"step": 26960
|
18962 |
+
},
|
18963 |
+
{
|
18964 |
+
"epoch": 0.67,
|
18965 |
+
"grad_norm": 4.631415367126465,
|
18966 |
+
"learning_rate": 2.0542372881355934e-06,
|
18967 |
+
"loss": 1.3517,
|
18968 |
+
"step": 26970
|
18969 |
+
},
|
18970 |
+
{
|
18971 |
+
"epoch": 0.67,
|
18972 |
+
"grad_norm": 9.62684154510498,
|
18973 |
+
"learning_rate": 2.0474576271186443e-06,
|
18974 |
+
"loss": 1.3364,
|
18975 |
+
"step": 26980
|
18976 |
+
},
|
18977 |
+
{
|
18978 |
+
"epoch": 0.67,
|
18979 |
+
"grad_norm": 4.8851447105407715,
|
18980 |
+
"learning_rate": 2.0406779661016953e-06,
|
18981 |
+
"loss": 1.2893,
|
18982 |
+
"step": 26990
|
18983 |
+
},
|
18984 |
+
{
|
18985 |
+
"epoch": 0.68,
|
18986 |
+
"grad_norm": 5.123071670532227,
|
18987 |
+
"learning_rate": 2.033898305084746e-06,
|
18988 |
+
"loss": 1.4069,
|
18989 |
+
"step": 27000
|
18990 |
+
},
|
18991 |
+
{
|
18992 |
+
"epoch": 0.68,
|
18993 |
+
"grad_norm": 3.8006324768066406,
|
18994 |
+
"learning_rate": 2.027118644067797e-06,
|
18995 |
+
"loss": 1.3013,
|
18996 |
+
"step": 27010
|
18997 |
+
},
|
18998 |
+
{
|
18999 |
+
"epoch": 0.68,
|
19000 |
+
"grad_norm": 3.052011728286743,
|
19001 |
+
"learning_rate": 2.020338983050848e-06,
|
19002 |
+
"loss": 1.3073,
|
19003 |
+
"step": 27020
|
19004 |
+
},
|
19005 |
+
{
|
19006 |
+
"epoch": 0.68,
|
19007 |
+
"grad_norm": 6.314701080322266,
|
19008 |
+
"learning_rate": 2.0135593220338984e-06,
|
19009 |
+
"loss": 1.3644,
|
19010 |
+
"step": 27030
|
19011 |
+
},
|
19012 |
+
{
|
19013 |
+
"epoch": 0.68,
|
19014 |
+
"grad_norm": 2.868659257888794,
|
19015 |
+
"learning_rate": 2.0067796610169494e-06,
|
19016 |
+
"loss": 1.4159,
|
19017 |
+
"step": 27040
|
19018 |
+
},
|
19019 |
+
{
|
19020 |
+
"epoch": 0.68,
|
19021 |
+
"grad_norm": 3.1452548503875732,
|
19022 |
+
"learning_rate": 2.0000000000000003e-06,
|
19023 |
+
"loss": 1.2184,
|
19024 |
+
"step": 27050
|
19025 |
+
},
|
19026 |
+
{
|
19027 |
+
"epoch": 0.68,
|
19028 |
+
"grad_norm": 7.137606620788574,
|
19029 |
+
"learning_rate": 1.993220338983051e-06,
|
19030 |
+
"loss": 1.1827,
|
19031 |
+
"step": 27060
|
19032 |
+
},
|
19033 |
+
{
|
19034 |
+
"epoch": 0.68,
|
19035 |
+
"grad_norm": 4.114950180053711,
|
19036 |
+
"learning_rate": 1.986440677966102e-06,
|
19037 |
+
"loss": 1.3316,
|
19038 |
+
"step": 27070
|
19039 |
+
},
|
19040 |
+
{
|
19041 |
+
"epoch": 0.68,
|
19042 |
+
"grad_norm": 4.815858364105225,
|
19043 |
+
"learning_rate": 1.9796610169491524e-06,
|
19044 |
+
"loss": 1.286,
|
19045 |
+
"step": 27080
|
19046 |
+
},
|
19047 |
+
{
|
19048 |
+
"epoch": 0.68,
|
19049 |
+
"grad_norm": 2.6551191806793213,
|
19050 |
+
"learning_rate": 1.9728813559322034e-06,
|
19051 |
+
"loss": 1.2562,
|
19052 |
+
"step": 27090
|
19053 |
+
},
|
19054 |
+
{
|
19055 |
+
"epoch": 0.68,
|
19056 |
+
"grad_norm": 10.009329795837402,
|
19057 |
+
"learning_rate": 1.9661016949152544e-06,
|
19058 |
+
"loss": 1.3984,
|
19059 |
+
"step": 27100
|
19060 |
+
},
|
19061 |
+
{
|
19062 |
+
"epoch": 0.68,
|
19063 |
+
"grad_norm": 2.3229589462280273,
|
19064 |
+
"learning_rate": 1.9593220338983053e-06,
|
19065 |
+
"loss": 1.3478,
|
19066 |
+
"step": 27110
|
19067 |
+
},
|
19068 |
+
{
|
19069 |
+
"epoch": 0.68,
|
19070 |
+
"grad_norm": 5.636902332305908,
|
19071 |
+
"learning_rate": 1.9525423728813563e-06,
|
19072 |
+
"loss": 1.3959,
|
19073 |
+
"step": 27120
|
19074 |
+
},
|
19075 |
+
{
|
19076 |
+
"epoch": 0.68,
|
19077 |
+
"grad_norm": 15.958221435546875,
|
19078 |
+
"learning_rate": 1.945762711864407e-06,
|
19079 |
+
"loss": 1.4275,
|
19080 |
+
"step": 27130
|
19081 |
+
},
|
19082 |
+
{
|
19083 |
+
"epoch": 0.68,
|
19084 |
+
"grad_norm": 5.097884654998779,
|
19085 |
+
"learning_rate": 1.938983050847458e-06,
|
19086 |
+
"loss": 1.3826,
|
19087 |
+
"step": 27140
|
19088 |
+
},
|
19089 |
+
{
|
19090 |
+
"epoch": 0.68,
|
19091 |
+
"grad_norm": 6.786471366882324,
|
19092 |
+
"learning_rate": 1.932203389830509e-06,
|
19093 |
+
"loss": 1.3073,
|
19094 |
+
"step": 27150
|
19095 |
+
},
|
19096 |
+
{
|
19097 |
+
"epoch": 0.68,
|
19098 |
+
"grad_norm": 7.529135704040527,
|
19099 |
+
"learning_rate": 1.9254237288135594e-06,
|
19100 |
+
"loss": 1.2605,
|
19101 |
+
"step": 27160
|
19102 |
+
},
|
19103 |
+
{
|
19104 |
+
"epoch": 0.68,
|
19105 |
+
"grad_norm": 10.065816879272461,
|
19106 |
+
"learning_rate": 1.9186440677966104e-06,
|
19107 |
+
"loss": 1.2377,
|
19108 |
+
"step": 27170
|
19109 |
+
},
|
19110 |
+
{
|
19111 |
+
"epoch": 0.68,
|
19112 |
+
"grad_norm": 5.812075614929199,
|
19113 |
+
"learning_rate": 1.9118644067796613e-06,
|
19114 |
+
"loss": 1.3309,
|
19115 |
+
"step": 27180
|
19116 |
+
},
|
19117 |
+
{
|
19118 |
+
"epoch": 0.68,
|
19119 |
+
"grad_norm": 4.214847564697266,
|
19120 |
+
"learning_rate": 1.9050847457627119e-06,
|
19121 |
+
"loss": 1.4063,
|
19122 |
+
"step": 27190
|
19123 |
+
},
|
19124 |
+
{
|
19125 |
+
"epoch": 0.68,
|
19126 |
+
"grad_norm": 4.021416187286377,
|
19127 |
+
"learning_rate": 1.8983050847457629e-06,
|
19128 |
+
"loss": 1.2773,
|
19129 |
+
"step": 27200
|
19130 |
+
},
|
19131 |
+
{
|
19132 |
+
"epoch": 0.68,
|
19133 |
+
"grad_norm": 7.9816083908081055,
|
19134 |
+
"learning_rate": 1.8915254237288136e-06,
|
19135 |
+
"loss": 1.2151,
|
19136 |
+
"step": 27210
|
19137 |
+
},
|
19138 |
+
{
|
19139 |
+
"epoch": 0.68,
|
19140 |
+
"grad_norm": 9.82458209991455,
|
19141 |
+
"learning_rate": 1.8847457627118646e-06,
|
19142 |
+
"loss": 1.393,
|
19143 |
+
"step": 27220
|
19144 |
+
},
|
19145 |
+
{
|
19146 |
+
"epoch": 0.68,
|
19147 |
+
"grad_norm": 16.87822914123535,
|
19148 |
+
"learning_rate": 1.8779661016949156e-06,
|
19149 |
+
"loss": 1.269,
|
19150 |
+
"step": 27230
|
19151 |
+
},
|
19152 |
+
{
|
19153 |
+
"epoch": 0.68,
|
19154 |
+
"grad_norm": 6.815838813781738,
|
19155 |
+
"learning_rate": 1.8711864406779661e-06,
|
19156 |
+
"loss": 1.4237,
|
19157 |
+
"step": 27240
|
19158 |
+
},
|
19159 |
+
{
|
19160 |
+
"epoch": 0.68,
|
19161 |
+
"grad_norm": 3.9835774898529053,
|
19162 |
+
"learning_rate": 1.8644067796610171e-06,
|
19163 |
+
"loss": 1.2872,
|
19164 |
+
"step": 27250
|
19165 |
+
},
|
19166 |
+
{
|
19167 |
+
"epoch": 0.68,
|
19168 |
+
"grad_norm": 4.0309953689575195,
|
19169 |
+
"learning_rate": 1.857627118644068e-06,
|
19170 |
+
"loss": 1.2629,
|
19171 |
+
"step": 27260
|
19172 |
+
},
|
19173 |
+
{
|
19174 |
+
"epoch": 0.68,
|
19175 |
+
"grad_norm": 12.035406112670898,
|
19176 |
+
"learning_rate": 1.8508474576271189e-06,
|
19177 |
+
"loss": 1.3755,
|
19178 |
+
"step": 27270
|
19179 |
+
},
|
19180 |
+
{
|
19181 |
+
"epoch": 0.68,
|
19182 |
+
"grad_norm": 1.8347936868667603,
|
19183 |
+
"learning_rate": 1.8440677966101696e-06,
|
19184 |
+
"loss": 1.3831,
|
19185 |
+
"step": 27280
|
19186 |
+
},
|
19187 |
+
{
|
19188 |
+
"epoch": 0.68,
|
19189 |
+
"grad_norm": 8.658760070800781,
|
19190 |
+
"learning_rate": 1.8372881355932204e-06,
|
19191 |
+
"loss": 1.3428,
|
19192 |
+
"step": 27290
|
19193 |
+
},
|
19194 |
+
{
|
19195 |
+
"epoch": 0.68,
|
19196 |
+
"grad_norm": 3.145319938659668,
|
19197 |
+
"learning_rate": 1.8305084745762714e-06,
|
19198 |
+
"loss": 1.4272,
|
19199 |
+
"step": 27300
|
19200 |
+
},
|
19201 |
+
{
|
19202 |
+
"epoch": 0.68,
|
19203 |
+
"grad_norm": 9.265095710754395,
|
19204 |
+
"learning_rate": 1.8237288135593223e-06,
|
19205 |
+
"loss": 1.4133,
|
19206 |
+
"step": 27310
|
19207 |
+
},
|
19208 |
+
{
|
19209 |
+
"epoch": 0.68,
|
19210 |
+
"grad_norm": 7.101969242095947,
|
19211 |
+
"learning_rate": 1.816949152542373e-06,
|
19212 |
+
"loss": 1.1041,
|
19213 |
+
"step": 27320
|
19214 |
+
},
|
19215 |
+
{
|
19216 |
+
"epoch": 0.68,
|
19217 |
+
"grad_norm": 16.614511489868164,
|
19218 |
+
"learning_rate": 1.8101694915254239e-06,
|
19219 |
+
"loss": 1.3473,
|
19220 |
+
"step": 27330
|
19221 |
+
},
|
19222 |
+
{
|
19223 |
+
"epoch": 0.68,
|
19224 |
+
"grad_norm": 2.301051378250122,
|
19225 |
+
"learning_rate": 1.8033898305084746e-06,
|
19226 |
+
"loss": 1.2432,
|
19227 |
+
"step": 27340
|
19228 |
+
},
|
19229 |
+
{
|
19230 |
+
"epoch": 0.68,
|
19231 |
+
"grad_norm": 5.643409729003906,
|
19232 |
+
"learning_rate": 1.7966101694915256e-06,
|
19233 |
+
"loss": 1.2781,
|
19234 |
+
"step": 27350
|
19235 |
+
},
|
19236 |
+
{
|
19237 |
+
"epoch": 0.68,
|
19238 |
+
"grad_norm": 7.286752223968506,
|
19239 |
+
"learning_rate": 1.7898305084745766e-06,
|
19240 |
+
"loss": 1.328,
|
19241 |
+
"step": 27360
|
19242 |
+
},
|
19243 |
+
{
|
19244 |
+
"epoch": 0.68,
|
19245 |
+
"grad_norm": 3.3953471183776855,
|
19246 |
+
"learning_rate": 1.7830508474576271e-06,
|
19247 |
+
"loss": 1.3025,
|
19248 |
+
"step": 27370
|
19249 |
+
},
|
19250 |
+
{
|
19251 |
+
"epoch": 0.68,
|
19252 |
+
"grad_norm": 8.240042686462402,
|
19253 |
+
"learning_rate": 1.7762711864406781e-06,
|
19254 |
+
"loss": 1.3847,
|
19255 |
+
"step": 27380
|
19256 |
+
},
|
19257 |
+
{
|
19258 |
+
"epoch": 0.68,
|
19259 |
+
"grad_norm": 9.705995559692383,
|
19260 |
+
"learning_rate": 1.769491525423729e-06,
|
19261 |
+
"loss": 1.427,
|
19262 |
+
"step": 27390
|
19263 |
+
},
|
19264 |
+
{
|
19265 |
+
"epoch": 0.69,
|
19266 |
+
"grad_norm": 3.306814193725586,
|
19267 |
+
"learning_rate": 1.7627118644067799e-06,
|
19268 |
+
"loss": 1.4487,
|
19269 |
+
"step": 27400
|
19270 |
+
},
|
19271 |
+
{
|
19272 |
+
"epoch": 0.69,
|
19273 |
+
"grad_norm": 15.25204086303711,
|
19274 |
+
"learning_rate": 1.7559322033898306e-06,
|
19275 |
+
"loss": 1.1448,
|
19276 |
+
"step": 27410
|
19277 |
+
},
|
19278 |
+
{
|
19279 |
+
"epoch": 0.69,
|
19280 |
+
"grad_norm": 9.065521240234375,
|
19281 |
+
"learning_rate": 1.7491525423728814e-06,
|
19282 |
+
"loss": 1.4139,
|
19283 |
+
"step": 27420
|
19284 |
+
},
|
19285 |
+
{
|
19286 |
+
"epoch": 0.69,
|
19287 |
+
"grad_norm": 7.884547233581543,
|
19288 |
+
"learning_rate": 1.7423728813559324e-06,
|
19289 |
+
"loss": 1.3313,
|
19290 |
+
"step": 27430
|
19291 |
+
},
|
19292 |
+
{
|
19293 |
+
"epoch": 0.69,
|
19294 |
+
"grad_norm": 8.109780311584473,
|
19295 |
+
"learning_rate": 1.7355932203389834e-06,
|
19296 |
+
"loss": 1.2523,
|
19297 |
+
"step": 27440
|
19298 |
+
},
|
19299 |
+
{
|
19300 |
+
"epoch": 0.69,
|
19301 |
+
"grad_norm": 23.829362869262695,
|
19302 |
+
"learning_rate": 1.728813559322034e-06,
|
19303 |
+
"loss": 1.2634,
|
19304 |
+
"step": 27450
|
19305 |
+
},
|
19306 |
+
{
|
19307 |
+
"epoch": 0.69,
|
19308 |
+
"grad_norm": 8.787532806396484,
|
19309 |
+
"learning_rate": 1.7220338983050849e-06,
|
19310 |
+
"loss": 1.3913,
|
19311 |
+
"step": 27460
|
19312 |
+
},
|
19313 |
+
{
|
19314 |
+
"epoch": 0.69,
|
19315 |
+
"grad_norm": 17.018415451049805,
|
19316 |
+
"learning_rate": 1.7152542372881356e-06,
|
19317 |
+
"loss": 1.2223,
|
19318 |
+
"step": 27470
|
19319 |
+
},
|
19320 |
+
{
|
19321 |
+
"epoch": 0.69,
|
19322 |
+
"grad_norm": 9.56651782989502,
|
19323 |
+
"learning_rate": 1.7084745762711866e-06,
|
19324 |
+
"loss": 1.3977,
|
19325 |
+
"step": 27480
|
19326 |
+
},
|
19327 |
+
{
|
19328 |
+
"epoch": 0.69,
|
19329 |
+
"grad_norm": 4.520813941955566,
|
19330 |
+
"learning_rate": 1.7016949152542376e-06,
|
19331 |
+
"loss": 1.3356,
|
19332 |
+
"step": 27490
|
19333 |
+
},
|
19334 |
+
{
|
19335 |
+
"epoch": 0.69,
|
19336 |
+
"grad_norm": 5.794963359832764,
|
19337 |
+
"learning_rate": 1.6949152542372882e-06,
|
19338 |
+
"loss": 1.2999,
|
19339 |
+
"step": 27500
|
19340 |
+
},
|
19341 |
+
{
|
19342 |
+
"epoch": 0.69,
|
19343 |
+
"eval_loss": 1.2965120077133179,
|
19344 |
+
"eval_runtime": 122.3839,
|
19345 |
+
"eval_samples_per_second": 8.171,
|
19346 |
+
"eval_steps_per_second": 8.171,
|
19347 |
+
"step": 27500
|
19348 |
}
|
19349 |
],
|
19350 |
"logging_steps": 10,
|
|
|
19352 |
"num_input_tokens_seen": 0,
|
19353 |
"num_train_epochs": 1,
|
19354 |
"save_steps": 2500,
|
19355 |
+
"total_flos": 4.4280846483456e+17,
|
19356 |
"train_batch_size": 1,
|
19357 |
"trial_name": null,
|
19358 |
"trial_params": null
|