Training in progress, step 2200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500770656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68a5f2e6e09d5a599ac3f5e4132b38cb2f2808055b01d21daad5de1dbf7d8dfa
|
3 |
size 500770656
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 254918356
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e961f44f14d857f9c661bfdd01d0523bda8b0eeb32356b9ed4b155ab12c5d67c
|
3 |
size 254918356
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbe133e546dea5be1ce4b75d499f8ca708e645b1f3a88ea8b11ed61eedc0be30
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22bbeb1b7ad4bb15235a9c7aad36c14d726aa3136b8446dea921fc7581667980
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14095,6 +14095,1414 @@
|
|
14095 |
"eval_samples_per_second": 2.489,
|
14096 |
"eval_steps_per_second": 2.489,
|
14097 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14098 |
}
|
14099 |
],
|
14100 |
"logging_steps": 1,
|
@@ -14109,7 +15517,7 @@
|
|
14109 |
"early_stopping_threshold": 0.0
|
14110 |
},
|
14111 |
"attributes": {
|
14112 |
-
"early_stopping_patience_counter":
|
14113 |
}
|
14114 |
},
|
14115 |
"TrainerControl": {
|
@@ -14123,7 +15531,7 @@
|
|
14123 |
"attributes": {}
|
14124 |
}
|
14125 |
},
|
14126 |
-
"total_flos": 3.
|
14127 |
"train_batch_size": 1,
|
14128 |
"trial_name": null,
|
14129 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.08438266813755035,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
4 |
+
"epoch": 0.24422057558349292,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 2200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14095 |
"eval_samples_per_second": 2.489,
|
14096 |
"eval_steps_per_second": 2.489,
|
14097 |
"step": 2000
|
14098 |
+
},
|
14099 |
+
{
|
14100 |
+
"epoch": 0.2221297144284406,
|
14101 |
+
"grad_norm": 0.47247323393821716,
|
14102 |
+
"learning_rate": 0.00019738059080164137,
|
14103 |
+
"loss": 0.0874,
|
14104 |
+
"step": 2001
|
14105 |
+
},
|
14106 |
+
{
|
14107 |
+
"epoch": 0.22224072378097856,
|
14108 |
+
"grad_norm": 0.3782826364040375,
|
14109 |
+
"learning_rate": 0.00019737794385770366,
|
14110 |
+
"loss": 0.1407,
|
14111 |
+
"step": 2002
|
14112 |
+
},
|
14113 |
+
{
|
14114 |
+
"epoch": 0.2223517331335165,
|
14115 |
+
"grad_norm": 0.657538115978241,
|
14116 |
+
"learning_rate": 0.00019737529559482345,
|
14117 |
+
"loss": 0.0743,
|
14118 |
+
"step": 2003
|
14119 |
+
},
|
14120 |
+
{
|
14121 |
+
"epoch": 0.22246274248605444,
|
14122 |
+
"grad_norm": 0.27409666776657104,
|
14123 |
+
"learning_rate": 0.00019737264601303665,
|
14124 |
+
"loss": 0.0795,
|
14125 |
+
"step": 2004
|
14126 |
+
},
|
14127 |
+
{
|
14128 |
+
"epoch": 0.2225737518385924,
|
14129 |
+
"grad_norm": 0.8207319378852844,
|
14130 |
+
"learning_rate": 0.00019736999511237914,
|
14131 |
+
"loss": 0.1032,
|
14132 |
+
"step": 2005
|
14133 |
+
},
|
14134 |
+
{
|
14135 |
+
"epoch": 0.22268476119113034,
|
14136 |
+
"grad_norm": 0.10261275619268417,
|
14137 |
+
"learning_rate": 0.00019736734289288677,
|
14138 |
+
"loss": 0.0564,
|
14139 |
+
"step": 2006
|
14140 |
+
},
|
14141 |
+
{
|
14142 |
+
"epoch": 0.2227957705436683,
|
14143 |
+
"grad_norm": 0.201162651181221,
|
14144 |
+
"learning_rate": 0.00019736468935459552,
|
14145 |
+
"loss": 0.0719,
|
14146 |
+
"step": 2007
|
14147 |
+
},
|
14148 |
+
{
|
14149 |
+
"epoch": 0.22290677989620625,
|
14150 |
+
"grad_norm": 0.20986336469650269,
|
14151 |
+
"learning_rate": 0.00019736203449754127,
|
14152 |
+
"loss": 0.0949,
|
14153 |
+
"step": 2008
|
14154 |
+
},
|
14155 |
+
{
|
14156 |
+
"epoch": 0.2230177892487442,
|
14157 |
+
"grad_norm": 0.20932605862617493,
|
14158 |
+
"learning_rate": 0.00019735937832176007,
|
14159 |
+
"loss": 0.0883,
|
14160 |
+
"step": 2009
|
14161 |
+
},
|
14162 |
+
{
|
14163 |
+
"epoch": 0.22312879860128215,
|
14164 |
+
"grad_norm": 0.1441095620393753,
|
14165 |
+
"learning_rate": 0.00019735672082728782,
|
14166 |
+
"loss": 0.0509,
|
14167 |
+
"step": 2010
|
14168 |
+
},
|
14169 |
+
{
|
14170 |
+
"epoch": 0.22323980795382012,
|
14171 |
+
"grad_norm": 0.3260079026222229,
|
14172 |
+
"learning_rate": 0.00019735406201416055,
|
14173 |
+
"loss": 0.0886,
|
14174 |
+
"step": 2011
|
14175 |
+
},
|
14176 |
+
{
|
14177 |
+
"epoch": 0.22335081730635806,
|
14178 |
+
"grad_norm": 0.2243819683790207,
|
14179 |
+
"learning_rate": 0.00019735140188241424,
|
14180 |
+
"loss": 0.0703,
|
14181 |
+
"step": 2012
|
14182 |
+
},
|
14183 |
+
{
|
14184 |
+
"epoch": 0.22346182665889602,
|
14185 |
+
"grad_norm": 0.1299426108598709,
|
14186 |
+
"learning_rate": 0.00019734874043208497,
|
14187 |
+
"loss": 0.0729,
|
14188 |
+
"step": 2013
|
14189 |
+
},
|
14190 |
+
{
|
14191 |
+
"epoch": 0.22357283601143396,
|
14192 |
+
"grad_norm": 0.3778630793094635,
|
14193 |
+
"learning_rate": 0.00019734607766320878,
|
14194 |
+
"loss": 0.0901,
|
14195 |
+
"step": 2014
|
14196 |
+
},
|
14197 |
+
{
|
14198 |
+
"epoch": 0.22368384536397193,
|
14199 |
+
"grad_norm": 0.7120828032493591,
|
14200 |
+
"learning_rate": 0.0001973434135758217,
|
14201 |
+
"loss": 0.1188,
|
14202 |
+
"step": 2015
|
14203 |
+
},
|
14204 |
+
{
|
14205 |
+
"epoch": 0.22379485471650987,
|
14206 |
+
"grad_norm": 0.14776043593883514,
|
14207 |
+
"learning_rate": 0.00019734074816995983,
|
14208 |
+
"loss": 0.0784,
|
14209 |
+
"step": 2016
|
14210 |
+
},
|
14211 |
+
{
|
14212 |
+
"epoch": 0.2239058640690478,
|
14213 |
+
"grad_norm": 0.29494839906692505,
|
14214 |
+
"learning_rate": 0.00019733808144565927,
|
14215 |
+
"loss": 0.1131,
|
14216 |
+
"step": 2017
|
14217 |
+
},
|
14218 |
+
{
|
14219 |
+
"epoch": 0.22401687342158577,
|
14220 |
+
"grad_norm": 0.22210551798343658,
|
14221 |
+
"learning_rate": 0.00019733541340295613,
|
14222 |
+
"loss": 0.0851,
|
14223 |
+
"step": 2018
|
14224 |
+
},
|
14225 |
+
{
|
14226 |
+
"epoch": 0.2241278827741237,
|
14227 |
+
"grad_norm": 0.9126520752906799,
|
14228 |
+
"learning_rate": 0.00019733274404188657,
|
14229 |
+
"loss": 0.1261,
|
14230 |
+
"step": 2019
|
14231 |
+
},
|
14232 |
+
{
|
14233 |
+
"epoch": 0.22423889212666168,
|
14234 |
+
"grad_norm": 0.3695898950099945,
|
14235 |
+
"learning_rate": 0.00019733007336248678,
|
14236 |
+
"loss": 0.1165,
|
14237 |
+
"step": 2020
|
14238 |
+
},
|
14239 |
+
{
|
14240 |
+
"epoch": 0.22434990147919961,
|
14241 |
+
"grad_norm": 0.17289452254772186,
|
14242 |
+
"learning_rate": 0.00019732740136479282,
|
14243 |
+
"loss": 0.0935,
|
14244 |
+
"step": 2021
|
14245 |
+
},
|
14246 |
+
{
|
14247 |
+
"epoch": 0.22446091083173758,
|
14248 |
+
"grad_norm": 0.15597273409366608,
|
14249 |
+
"learning_rate": 0.000197324728048841,
|
14250 |
+
"loss": 0.0762,
|
14251 |
+
"step": 2022
|
14252 |
+
},
|
14253 |
+
{
|
14254 |
+
"epoch": 0.22457192018427552,
|
14255 |
+
"grad_norm": 0.2585983872413635,
|
14256 |
+
"learning_rate": 0.00019732205341466746,
|
14257 |
+
"loss": 0.0991,
|
14258 |
+
"step": 2023
|
14259 |
+
},
|
14260 |
+
{
|
14261 |
+
"epoch": 0.22468292953681349,
|
14262 |
+
"grad_norm": 0.16065822541713715,
|
14263 |
+
"learning_rate": 0.00019731937746230845,
|
14264 |
+
"loss": 0.0964,
|
14265 |
+
"step": 2024
|
14266 |
+
},
|
14267 |
+
{
|
14268 |
+
"epoch": 0.22479393888935142,
|
14269 |
+
"grad_norm": 0.31113800406455994,
|
14270 |
+
"learning_rate": 0.0001973167001918002,
|
14271 |
+
"loss": 0.0825,
|
14272 |
+
"step": 2025
|
14273 |
+
},
|
14274 |
+
{
|
14275 |
+
"epoch": 0.2249049482418894,
|
14276 |
+
"grad_norm": 1.9390000104904175,
|
14277 |
+
"learning_rate": 0.000197314021603179,
|
14278 |
+
"loss": 0.1568,
|
14279 |
+
"step": 2026
|
14280 |
+
},
|
14281 |
+
{
|
14282 |
+
"epoch": 0.22501595759442733,
|
14283 |
+
"grad_norm": 0.16965048015117645,
|
14284 |
+
"learning_rate": 0.00019731134169648112,
|
14285 |
+
"loss": 0.0962,
|
14286 |
+
"step": 2027
|
14287 |
+
},
|
14288 |
+
{
|
14289 |
+
"epoch": 0.2251269669469653,
|
14290 |
+
"grad_norm": 0.16715994477272034,
|
14291 |
+
"learning_rate": 0.00019730866047174282,
|
14292 |
+
"loss": 0.0815,
|
14293 |
+
"step": 2028
|
14294 |
+
},
|
14295 |
+
{
|
14296 |
+
"epoch": 0.22523797629950323,
|
14297 |
+
"grad_norm": 1.6501140594482422,
|
14298 |
+
"learning_rate": 0.00019730597792900047,
|
14299 |
+
"loss": 0.1487,
|
14300 |
+
"step": 2029
|
14301 |
+
},
|
14302 |
+
{
|
14303 |
+
"epoch": 0.22534898565204117,
|
14304 |
+
"grad_norm": 0.6177855730056763,
|
14305 |
+
"learning_rate": 0.0001973032940682904,
|
14306 |
+
"loss": 0.1149,
|
14307 |
+
"step": 2030
|
14308 |
+
},
|
14309 |
+
{
|
14310 |
+
"epoch": 0.22545999500457914,
|
14311 |
+
"grad_norm": 0.34974008798599243,
|
14312 |
+
"learning_rate": 0.00019730060888964893,
|
14313 |
+
"loss": 0.0863,
|
14314 |
+
"step": 2031
|
14315 |
+
},
|
14316 |
+
{
|
14317 |
+
"epoch": 0.22557100435711708,
|
14318 |
+
"grad_norm": 0.08222179859876633,
|
14319 |
+
"learning_rate": 0.00019729792239311242,
|
14320 |
+
"loss": 0.0794,
|
14321 |
+
"step": 2032
|
14322 |
+
},
|
14323 |
+
{
|
14324 |
+
"epoch": 0.22568201370965504,
|
14325 |
+
"grad_norm": 0.23118165135383606,
|
14326 |
+
"learning_rate": 0.0001972952345787173,
|
14327 |
+
"loss": 0.089,
|
14328 |
+
"step": 2033
|
14329 |
+
},
|
14330 |
+
{
|
14331 |
+
"epoch": 0.22579302306219298,
|
14332 |
+
"grad_norm": 0.3532223403453827,
|
14333 |
+
"learning_rate": 0.00019729254544649996,
|
14334 |
+
"loss": 0.092,
|
14335 |
+
"step": 2034
|
14336 |
+
},
|
14337 |
+
{
|
14338 |
+
"epoch": 0.22590403241473095,
|
14339 |
+
"grad_norm": 0.10777556151151657,
|
14340 |
+
"learning_rate": 0.00019728985499649682,
|
14341 |
+
"loss": 0.0967,
|
14342 |
+
"step": 2035
|
14343 |
+
},
|
14344 |
+
{
|
14345 |
+
"epoch": 0.2260150417672689,
|
14346 |
+
"grad_norm": 0.2677795886993408,
|
14347 |
+
"learning_rate": 0.0001972871632287443,
|
14348 |
+
"loss": 0.0926,
|
14349 |
+
"step": 2036
|
14350 |
+
},
|
14351 |
+
{
|
14352 |
+
"epoch": 0.22612605111980685,
|
14353 |
+
"grad_norm": 0.09042158722877502,
|
14354 |
+
"learning_rate": 0.0001972844701432789,
|
14355 |
+
"loss": 0.0821,
|
14356 |
+
"step": 2037
|
14357 |
+
},
|
14358 |
+
{
|
14359 |
+
"epoch": 0.2262370604723448,
|
14360 |
+
"grad_norm": 0.2295641005039215,
|
14361 |
+
"learning_rate": 0.00019728177574013705,
|
14362 |
+
"loss": 0.1149,
|
14363 |
+
"step": 2038
|
14364 |
+
},
|
14365 |
+
{
|
14366 |
+
"epoch": 0.22634806982488276,
|
14367 |
+
"grad_norm": 0.19486144185066223,
|
14368 |
+
"learning_rate": 0.0001972790800193553,
|
14369 |
+
"loss": 0.0888,
|
14370 |
+
"step": 2039
|
14371 |
+
},
|
14372 |
+
{
|
14373 |
+
"epoch": 0.2264590791774207,
|
14374 |
+
"grad_norm": 0.24862056970596313,
|
14375 |
+
"learning_rate": 0.0001972763829809701,
|
14376 |
+
"loss": 0.096,
|
14377 |
+
"step": 2040
|
14378 |
+
},
|
14379 |
+
{
|
14380 |
+
"epoch": 0.22657008852995864,
|
14381 |
+
"grad_norm": 0.1913110315799713,
|
14382 |
+
"learning_rate": 0.00019727368462501806,
|
14383 |
+
"loss": 0.0782,
|
14384 |
+
"step": 2041
|
14385 |
+
},
|
14386 |
+
{
|
14387 |
+
"epoch": 0.2266810978824966,
|
14388 |
+
"grad_norm": 0.16473524272441864,
|
14389 |
+
"learning_rate": 0.00019727098495153563,
|
14390 |
+
"loss": 0.0982,
|
14391 |
+
"step": 2042
|
14392 |
+
},
|
14393 |
+
{
|
14394 |
+
"epoch": 0.22679210723503454,
|
14395 |
+
"grad_norm": 0.18526411056518555,
|
14396 |
+
"learning_rate": 0.00019726828396055948,
|
14397 |
+
"loss": 0.0883,
|
14398 |
+
"step": 2043
|
14399 |
+
},
|
14400 |
+
{
|
14401 |
+
"epoch": 0.2269031165875725,
|
14402 |
+
"grad_norm": 0.20631423592567444,
|
14403 |
+
"learning_rate": 0.00019726558165212608,
|
14404 |
+
"loss": 0.1005,
|
14405 |
+
"step": 2044
|
14406 |
+
},
|
14407 |
+
{
|
14408 |
+
"epoch": 0.22701412594011044,
|
14409 |
+
"grad_norm": 0.4606611430644989,
|
14410 |
+
"learning_rate": 0.00019726287802627215,
|
14411 |
+
"loss": 0.1075,
|
14412 |
+
"step": 2045
|
14413 |
+
},
|
14414 |
+
{
|
14415 |
+
"epoch": 0.2271251352926484,
|
14416 |
+
"grad_norm": 0.17766547203063965,
|
14417 |
+
"learning_rate": 0.00019726017308303422,
|
14418 |
+
"loss": 0.0719,
|
14419 |
+
"step": 2046
|
14420 |
+
},
|
14421 |
+
{
|
14422 |
+
"epoch": 0.22723614464518635,
|
14423 |
+
"grad_norm": 0.12889623641967773,
|
14424 |
+
"learning_rate": 0.00019725746682244897,
|
14425 |
+
"loss": 0.0805,
|
14426 |
+
"step": 2047
|
14427 |
+
},
|
14428 |
+
{
|
14429 |
+
"epoch": 0.22734715399772432,
|
14430 |
+
"grad_norm": 0.14123119413852692,
|
14431 |
+
"learning_rate": 0.00019725475924455303,
|
14432 |
+
"loss": 0.0788,
|
14433 |
+
"step": 2048
|
14434 |
+
},
|
14435 |
+
{
|
14436 |
+
"epoch": 0.22745816335026225,
|
14437 |
+
"grad_norm": 0.28150174021720886,
|
14438 |
+
"learning_rate": 0.0001972520503493831,
|
14439 |
+
"loss": 0.0828,
|
14440 |
+
"step": 2049
|
14441 |
+
},
|
14442 |
+
{
|
14443 |
+
"epoch": 0.22756917270280022,
|
14444 |
+
"grad_norm": 0.27206283807754517,
|
14445 |
+
"learning_rate": 0.00019724934013697586,
|
14446 |
+
"loss": 0.057,
|
14447 |
+
"step": 2050
|
14448 |
+
},
|
14449 |
+
{
|
14450 |
+
"epoch": 0.22768018205533816,
|
14451 |
+
"grad_norm": 0.3850548565387726,
|
14452 |
+
"learning_rate": 0.00019724662860736802,
|
14453 |
+
"loss": 0.0938,
|
14454 |
+
"step": 2051
|
14455 |
+
},
|
14456 |
+
{
|
14457 |
+
"epoch": 0.22779119140787613,
|
14458 |
+
"grad_norm": 0.12461239099502563,
|
14459 |
+
"learning_rate": 0.00019724391576059626,
|
14460 |
+
"loss": 0.0793,
|
14461 |
+
"step": 2052
|
14462 |
+
},
|
14463 |
+
{
|
14464 |
+
"epoch": 0.22790220076041406,
|
14465 |
+
"grad_norm": 0.09359429776668549,
|
14466 |
+
"learning_rate": 0.00019724120159669744,
|
14467 |
+
"loss": 0.0785,
|
14468 |
+
"step": 2053
|
14469 |
+
},
|
14470 |
+
{
|
14471 |
+
"epoch": 0.228013210112952,
|
14472 |
+
"grad_norm": 0.15190820395946503,
|
14473 |
+
"learning_rate": 0.00019723848611570818,
|
14474 |
+
"loss": 0.0856,
|
14475 |
+
"step": 2054
|
14476 |
+
},
|
14477 |
+
{
|
14478 |
+
"epoch": 0.22812421946548997,
|
14479 |
+
"grad_norm": 0.2869325578212738,
|
14480 |
+
"learning_rate": 0.00019723576931766538,
|
14481 |
+
"loss": 0.0772,
|
14482 |
+
"step": 2055
|
14483 |
+
},
|
14484 |
+
{
|
14485 |
+
"epoch": 0.2282352288180279,
|
14486 |
+
"grad_norm": 0.21830858290195465,
|
14487 |
+
"learning_rate": 0.00019723305120260577,
|
14488 |
+
"loss": 0.0691,
|
14489 |
+
"step": 2056
|
14490 |
+
},
|
14491 |
+
{
|
14492 |
+
"epoch": 0.22834623817056587,
|
14493 |
+
"grad_norm": 0.2458673119544983,
|
14494 |
+
"learning_rate": 0.00019723033177056616,
|
14495 |
+
"loss": 0.0739,
|
14496 |
+
"step": 2057
|
14497 |
+
},
|
14498 |
+
{
|
14499 |
+
"epoch": 0.2284572475231038,
|
14500 |
+
"grad_norm": 0.31198403239250183,
|
14501 |
+
"learning_rate": 0.00019722761102158341,
|
14502 |
+
"loss": 0.0713,
|
14503 |
+
"step": 2058
|
14504 |
+
},
|
14505 |
+
{
|
14506 |
+
"epoch": 0.22856825687564178,
|
14507 |
+
"grad_norm": 0.22141993045806885,
|
14508 |
+
"learning_rate": 0.00019722488895569442,
|
14509 |
+
"loss": 0.0819,
|
14510 |
+
"step": 2059
|
14511 |
+
},
|
14512 |
+
{
|
14513 |
+
"epoch": 0.22867926622817972,
|
14514 |
+
"grad_norm": 0.11993864178657532,
|
14515 |
+
"learning_rate": 0.00019722216557293597,
|
14516 |
+
"loss": 0.0765,
|
14517 |
+
"step": 2060
|
14518 |
+
},
|
14519 |
+
{
|
14520 |
+
"epoch": 0.22879027558071768,
|
14521 |
+
"grad_norm": 0.1264715939760208,
|
14522 |
+
"learning_rate": 0.000197219440873345,
|
14523 |
+
"loss": 0.0573,
|
14524 |
+
"step": 2061
|
14525 |
+
},
|
14526 |
+
{
|
14527 |
+
"epoch": 0.22890128493325562,
|
14528 |
+
"grad_norm": 0.4262503981590271,
|
14529 |
+
"learning_rate": 0.00019721671485695835,
|
14530 |
+
"loss": 0.0935,
|
14531 |
+
"step": 2062
|
14532 |
+
},
|
14533 |
+
{
|
14534 |
+
"epoch": 0.2290122942857936,
|
14535 |
+
"grad_norm": 0.3699107766151428,
|
14536 |
+
"learning_rate": 0.00019721398752381303,
|
14537 |
+
"loss": 0.105,
|
14538 |
+
"step": 2063
|
14539 |
+
},
|
14540 |
+
{
|
14541 |
+
"epoch": 0.22912330363833153,
|
14542 |
+
"grad_norm": 0.29569151997566223,
|
14543 |
+
"learning_rate": 0.00019721125887394597,
|
14544 |
+
"loss": 0.0663,
|
14545 |
+
"step": 2064
|
14546 |
+
},
|
14547 |
+
{
|
14548 |
+
"epoch": 0.2292343129908695,
|
14549 |
+
"grad_norm": 0.14101409912109375,
|
14550 |
+
"learning_rate": 0.00019720852890739408,
|
14551 |
+
"loss": 0.075,
|
14552 |
+
"step": 2065
|
14553 |
+
},
|
14554 |
+
{
|
14555 |
+
"epoch": 0.22934532234340743,
|
14556 |
+
"grad_norm": 0.19486425817012787,
|
14557 |
+
"learning_rate": 0.00019720579762419433,
|
14558 |
+
"loss": 0.1148,
|
14559 |
+
"step": 2066
|
14560 |
+
},
|
14561 |
+
{
|
14562 |
+
"epoch": 0.22945633169594537,
|
14563 |
+
"grad_norm": 0.22603991627693176,
|
14564 |
+
"learning_rate": 0.00019720306502438376,
|
14565 |
+
"loss": 0.1018,
|
14566 |
+
"step": 2067
|
14567 |
+
},
|
14568 |
+
{
|
14569 |
+
"epoch": 0.22956734104848334,
|
14570 |
+
"grad_norm": 0.2293156087398529,
|
14571 |
+
"learning_rate": 0.00019720033110799936,
|
14572 |
+
"loss": 0.085,
|
14573 |
+
"step": 2068
|
14574 |
+
},
|
14575 |
+
{
|
14576 |
+
"epoch": 0.22967835040102128,
|
14577 |
+
"grad_norm": 0.5674437284469604,
|
14578 |
+
"learning_rate": 0.00019719759587507815,
|
14579 |
+
"loss": 0.1024,
|
14580 |
+
"step": 2069
|
14581 |
+
},
|
14582 |
+
{
|
14583 |
+
"epoch": 0.22978935975355924,
|
14584 |
+
"grad_norm": 0.19658319652080536,
|
14585 |
+
"learning_rate": 0.00019719485932565722,
|
14586 |
+
"loss": 0.0954,
|
14587 |
+
"step": 2070
|
14588 |
+
},
|
14589 |
+
{
|
14590 |
+
"epoch": 0.22990036910609718,
|
14591 |
+
"grad_norm": 0.31995558738708496,
|
14592 |
+
"learning_rate": 0.00019719212145977359,
|
14593 |
+
"loss": 0.0659,
|
14594 |
+
"step": 2071
|
14595 |
+
},
|
14596 |
+
{
|
14597 |
+
"epoch": 0.23001137845863515,
|
14598 |
+
"grad_norm": 0.40643981099128723,
|
14599 |
+
"learning_rate": 0.00019718938227746436,
|
14600 |
+
"loss": 0.1045,
|
14601 |
+
"step": 2072
|
14602 |
+
},
|
14603 |
+
{
|
14604 |
+
"epoch": 0.23012238781117308,
|
14605 |
+
"grad_norm": 0.14388489723205566,
|
14606 |
+
"learning_rate": 0.00019718664177876665,
|
14607 |
+
"loss": 0.0697,
|
14608 |
+
"step": 2073
|
14609 |
+
},
|
14610 |
+
{
|
14611 |
+
"epoch": 0.23023339716371105,
|
14612 |
+
"grad_norm": 0.16220074892044067,
|
14613 |
+
"learning_rate": 0.00019718389996371753,
|
14614 |
+
"loss": 0.0864,
|
14615 |
+
"step": 2074
|
14616 |
+
},
|
14617 |
+
{
|
14618 |
+
"epoch": 0.230344406516249,
|
14619 |
+
"grad_norm": 0.5444479584693909,
|
14620 |
+
"learning_rate": 0.00019718115683235417,
|
14621 |
+
"loss": 0.1032,
|
14622 |
+
"step": 2075
|
14623 |
+
},
|
14624 |
+
{
|
14625 |
+
"epoch": 0.23045541586878696,
|
14626 |
+
"grad_norm": 0.4288277328014374,
|
14627 |
+
"learning_rate": 0.00019717841238471375,
|
14628 |
+
"loss": 0.0843,
|
14629 |
+
"step": 2076
|
14630 |
+
},
|
14631 |
+
{
|
14632 |
+
"epoch": 0.2305664252213249,
|
14633 |
+
"grad_norm": 2.3215818405151367,
|
14634 |
+
"learning_rate": 0.00019717566662083339,
|
14635 |
+
"loss": 0.1822,
|
14636 |
+
"step": 2077
|
14637 |
+
},
|
14638 |
+
{
|
14639 |
+
"epoch": 0.23067743457386286,
|
14640 |
+
"grad_norm": 0.2839007079601288,
|
14641 |
+
"learning_rate": 0.0001971729195407503,
|
14642 |
+
"loss": 0.0886,
|
14643 |
+
"step": 2078
|
14644 |
+
},
|
14645 |
+
{
|
14646 |
+
"epoch": 0.2307884439264008,
|
14647 |
+
"grad_norm": 0.3162185549736023,
|
14648 |
+
"learning_rate": 0.00019717017114450166,
|
14649 |
+
"loss": 0.088,
|
14650 |
+
"step": 2079
|
14651 |
+
},
|
14652 |
+
{
|
14653 |
+
"epoch": 0.23089945327893874,
|
14654 |
+
"grad_norm": 0.5075798034667969,
|
14655 |
+
"learning_rate": 0.00019716742143212476,
|
14656 |
+
"loss": 0.1003,
|
14657 |
+
"step": 2080
|
14658 |
+
},
|
14659 |
+
{
|
14660 |
+
"epoch": 0.2310104626314767,
|
14661 |
+
"grad_norm": 0.21239706873893738,
|
14662 |
+
"learning_rate": 0.00019716467040365682,
|
14663 |
+
"loss": 0.0523,
|
14664 |
+
"step": 2081
|
14665 |
+
},
|
14666 |
+
{
|
14667 |
+
"epoch": 0.23112147198401464,
|
14668 |
+
"grad_norm": 0.23680302500724792,
|
14669 |
+
"learning_rate": 0.0001971619180591351,
|
14670 |
+
"loss": 0.0643,
|
14671 |
+
"step": 2082
|
14672 |
+
},
|
14673 |
+
{
|
14674 |
+
"epoch": 0.2312324813365526,
|
14675 |
+
"grad_norm": 0.21561571955680847,
|
14676 |
+
"learning_rate": 0.00019715916439859684,
|
14677 |
+
"loss": 0.0707,
|
14678 |
+
"step": 2083
|
14679 |
+
},
|
14680 |
+
{
|
14681 |
+
"epoch": 0.23134349068909055,
|
14682 |
+
"grad_norm": 0.16770891845226288,
|
14683 |
+
"learning_rate": 0.0001971564094220794,
|
14684 |
+
"loss": 0.0953,
|
14685 |
+
"step": 2084
|
14686 |
+
},
|
14687 |
+
{
|
14688 |
+
"epoch": 0.2314545000416285,
|
14689 |
+
"grad_norm": 0.2525671720504761,
|
14690 |
+
"learning_rate": 0.00019715365312962,
|
14691 |
+
"loss": 0.0994,
|
14692 |
+
"step": 2085
|
14693 |
+
},
|
14694 |
+
{
|
14695 |
+
"epoch": 0.23156550939416645,
|
14696 |
+
"grad_norm": 0.24709418416023254,
|
14697 |
+
"learning_rate": 0.0001971508955212561,
|
14698 |
+
"loss": 0.0868,
|
14699 |
+
"step": 2086
|
14700 |
+
},
|
14701 |
+
{
|
14702 |
+
"epoch": 0.23167651874670442,
|
14703 |
+
"grad_norm": 0.17875786125659943,
|
14704 |
+
"learning_rate": 0.00019714813659702497,
|
14705 |
+
"loss": 0.11,
|
14706 |
+
"step": 2087
|
14707 |
+
},
|
14708 |
+
{
|
14709 |
+
"epoch": 0.23178752809924236,
|
14710 |
+
"grad_norm": 0.12514136731624603,
|
14711 |
+
"learning_rate": 0.00019714537635696398,
|
14712 |
+
"loss": 0.0588,
|
14713 |
+
"step": 2088
|
14714 |
+
},
|
14715 |
+
{
|
14716 |
+
"epoch": 0.23189853745178032,
|
14717 |
+
"grad_norm": 0.34089913964271545,
|
14718 |
+
"learning_rate": 0.00019714261480111052,
|
14719 |
+
"loss": 0.141,
|
14720 |
+
"step": 2089
|
14721 |
+
},
|
14722 |
+
{
|
14723 |
+
"epoch": 0.23200954680431826,
|
14724 |
+
"grad_norm": 0.22774739563465118,
|
14725 |
+
"learning_rate": 0.00019713985192950205,
|
14726 |
+
"loss": 0.0842,
|
14727 |
+
"step": 2090
|
14728 |
+
},
|
14729 |
+
{
|
14730 |
+
"epoch": 0.23212055615685623,
|
14731 |
+
"grad_norm": 0.1931809037923813,
|
14732 |
+
"learning_rate": 0.00019713708774217592,
|
14733 |
+
"loss": 0.0902,
|
14734 |
+
"step": 2091
|
14735 |
+
},
|
14736 |
+
{
|
14737 |
+
"epoch": 0.23223156550939417,
|
14738 |
+
"grad_norm": 0.25692617893218994,
|
14739 |
+
"learning_rate": 0.00019713432223916963,
|
14740 |
+
"loss": 0.0769,
|
14741 |
+
"step": 2092
|
14742 |
+
},
|
14743 |
+
{
|
14744 |
+
"epoch": 0.2323425748619321,
|
14745 |
+
"grad_norm": 0.07520927488803864,
|
14746 |
+
"learning_rate": 0.00019713155542052055,
|
14747 |
+
"loss": 0.0765,
|
14748 |
+
"step": 2093
|
14749 |
+
},
|
14750 |
+
{
|
14751 |
+
"epoch": 0.23245358421447007,
|
14752 |
+
"grad_norm": 0.22351422905921936,
|
14753 |
+
"learning_rate": 0.00019712878728626624,
|
14754 |
+
"loss": 0.0762,
|
14755 |
+
"step": 2094
|
14756 |
+
},
|
14757 |
+
{
|
14758 |
+
"epoch": 0.232564593567008,
|
14759 |
+
"grad_norm": 0.1506834775209427,
|
14760 |
+
"learning_rate": 0.00019712601783644418,
|
14761 |
+
"loss": 0.0864,
|
14762 |
+
"step": 2095
|
14763 |
+
},
|
14764 |
+
{
|
14765 |
+
"epoch": 0.23267560291954598,
|
14766 |
+
"grad_norm": 0.36866268515586853,
|
14767 |
+
"learning_rate": 0.00019712324707109183,
|
14768 |
+
"loss": 0.0683,
|
14769 |
+
"step": 2096
|
14770 |
+
},
|
14771 |
+
{
|
14772 |
+
"epoch": 0.23278661227208391,
|
14773 |
+
"grad_norm": 0.34709832072257996,
|
14774 |
+
"learning_rate": 0.00019712047499024677,
|
14775 |
+
"loss": 0.0803,
|
14776 |
+
"step": 2097
|
14777 |
+
},
|
14778 |
+
{
|
14779 |
+
"epoch": 0.23289762162462188,
|
14780 |
+
"grad_norm": 0.13359051942825317,
|
14781 |
+
"learning_rate": 0.00019711770159394653,
|
14782 |
+
"loss": 0.0979,
|
14783 |
+
"step": 2098
|
14784 |
+
},
|
14785 |
+
{
|
14786 |
+
"epoch": 0.23300863097715982,
|
14787 |
+
"grad_norm": 0.1268121898174286,
|
14788 |
+
"learning_rate": 0.00019711492688222868,
|
14789 |
+
"loss": 0.0829,
|
14790 |
+
"step": 2099
|
14791 |
+
},
|
14792 |
+
{
|
14793 |
+
"epoch": 0.23311964032969779,
|
14794 |
+
"grad_norm": 0.4034717381000519,
|
14795 |
+
"learning_rate": 0.0001971121508551308,
|
14796 |
+
"loss": 0.0946,
|
14797 |
+
"step": 2100
|
14798 |
+
},
|
14799 |
+
{
|
14800 |
+
"epoch": 0.23323064968223572,
|
14801 |
+
"grad_norm": 0.30090880393981934,
|
14802 |
+
"learning_rate": 0.00019710937351269045,
|
14803 |
+
"loss": 0.1036,
|
14804 |
+
"step": 2101
|
14805 |
+
},
|
14806 |
+
{
|
14807 |
+
"epoch": 0.2333416590347737,
|
14808 |
+
"grad_norm": 0.238031268119812,
|
14809 |
+
"learning_rate": 0.0001971065948549453,
|
14810 |
+
"loss": 0.1085,
|
14811 |
+
"step": 2102
|
14812 |
+
},
|
14813 |
+
{
|
14814 |
+
"epoch": 0.23345266838731163,
|
14815 |
+
"grad_norm": 0.12888558208942413,
|
14816 |
+
"learning_rate": 0.000197103814881933,
|
14817 |
+
"loss": 0.0841,
|
14818 |
+
"step": 2103
|
14819 |
+
},
|
14820 |
+
{
|
14821 |
+
"epoch": 0.2335636777398496,
|
14822 |
+
"grad_norm": 0.21555829048156738,
|
14823 |
+
"learning_rate": 0.00019710103359369115,
|
14824 |
+
"loss": 0.1045,
|
14825 |
+
"step": 2104
|
14826 |
+
},
|
14827 |
+
{
|
14828 |
+
"epoch": 0.23367468709238753,
|
14829 |
+
"grad_norm": 0.1383059173822403,
|
14830 |
+
"learning_rate": 0.00019709825099025746,
|
14831 |
+
"loss": 0.076,
|
14832 |
+
"step": 2105
|
14833 |
+
},
|
14834 |
+
{
|
14835 |
+
"epoch": 0.23378569644492547,
|
14836 |
+
"grad_norm": 0.1340472400188446,
|
14837 |
+
"learning_rate": 0.00019709546707166959,
|
14838 |
+
"loss": 0.0892,
|
14839 |
+
"step": 2106
|
14840 |
+
},
|
14841 |
+
{
|
14842 |
+
"epoch": 0.23389670579746344,
|
14843 |
+
"grad_norm": 0.6373670697212219,
|
14844 |
+
"learning_rate": 0.00019709268183796524,
|
14845 |
+
"loss": 0.1068,
|
14846 |
+
"step": 2107
|
14847 |
+
},
|
14848 |
+
{
|
14849 |
+
"epoch": 0.23400771515000138,
|
14850 |
+
"grad_norm": 0.47205066680908203,
|
14851 |
+
"learning_rate": 0.0001970898952891822,
|
14852 |
+
"loss": 0.1035,
|
14853 |
+
"step": 2108
|
14854 |
+
},
|
14855 |
+
{
|
14856 |
+
"epoch": 0.23411872450253934,
|
14857 |
+
"grad_norm": 0.40509045124053955,
|
14858 |
+
"learning_rate": 0.00019708710742535814,
|
14859 |
+
"loss": 0.0767,
|
14860 |
+
"step": 2109
|
14861 |
+
},
|
14862 |
+
{
|
14863 |
+
"epoch": 0.23422973385507728,
|
14864 |
+
"grad_norm": 0.18465881049633026,
|
14865 |
+
"learning_rate": 0.00019708431824653087,
|
14866 |
+
"loss": 0.1045,
|
14867 |
+
"step": 2110
|
14868 |
+
},
|
14869 |
+
{
|
14870 |
+
"epoch": 0.23434074320761525,
|
14871 |
+
"grad_norm": 0.190477654337883,
|
14872 |
+
"learning_rate": 0.00019708152775273812,
|
14873 |
+
"loss": 0.0892,
|
14874 |
+
"step": 2111
|
14875 |
+
},
|
14876 |
+
{
|
14877 |
+
"epoch": 0.2344517525601532,
|
14878 |
+
"grad_norm": 0.29226070642471313,
|
14879 |
+
"learning_rate": 0.00019707873594401772,
|
14880 |
+
"loss": 0.0815,
|
14881 |
+
"step": 2112
|
14882 |
+
},
|
14883 |
+
{
|
14884 |
+
"epoch": 0.23456276191269115,
|
14885 |
+
"grad_norm": 0.14238286018371582,
|
14886 |
+
"learning_rate": 0.0001970759428204075,
|
14887 |
+
"loss": 0.0952,
|
14888 |
+
"step": 2113
|
14889 |
+
},
|
14890 |
+
{
|
14891 |
+
"epoch": 0.2346737712652291,
|
14892 |
+
"grad_norm": 0.2061157077550888,
|
14893 |
+
"learning_rate": 0.00019707314838194524,
|
14894 |
+
"loss": 0.0498,
|
14895 |
+
"step": 2114
|
14896 |
+
},
|
14897 |
+
{
|
14898 |
+
"epoch": 0.23478478061776706,
|
14899 |
+
"grad_norm": 0.1603352278470993,
|
14900 |
+
"learning_rate": 0.00019707035262866884,
|
14901 |
+
"loss": 0.058,
|
14902 |
+
"step": 2115
|
14903 |
+
},
|
14904 |
+
{
|
14905 |
+
"epoch": 0.234895789970305,
|
14906 |
+
"grad_norm": 0.2591029405593872,
|
14907 |
+
"learning_rate": 0.00019706755556061614,
|
14908 |
+
"loss": 0.0963,
|
14909 |
+
"step": 2116
|
14910 |
+
},
|
14911 |
+
{
|
14912 |
+
"epoch": 0.23500679932284296,
|
14913 |
+
"grad_norm": 0.13144443929195404,
|
14914 |
+
"learning_rate": 0.00019706475717782502,
|
14915 |
+
"loss": 0.095,
|
14916 |
+
"step": 2117
|
14917 |
+
},
|
14918 |
+
{
|
14919 |
+
"epoch": 0.2351178086753809,
|
14920 |
+
"grad_norm": 0.21387794613838196,
|
14921 |
+
"learning_rate": 0.0001970619574803334,
|
14922 |
+
"loss": 0.0708,
|
14923 |
+
"step": 2118
|
14924 |
+
},
|
14925 |
+
{
|
14926 |
+
"epoch": 0.23522881802791884,
|
14927 |
+
"grad_norm": 0.20653082430362701,
|
14928 |
+
"learning_rate": 0.0001970591564681792,
|
14929 |
+
"loss": 0.1087,
|
14930 |
+
"step": 2119
|
14931 |
+
},
|
14932 |
+
{
|
14933 |
+
"epoch": 0.2353398273804568,
|
14934 |
+
"grad_norm": 0.29304152727127075,
|
14935 |
+
"learning_rate": 0.00019705635414140035,
|
14936 |
+
"loss": 0.0581,
|
14937 |
+
"step": 2120
|
14938 |
+
},
|
14939 |
+
{
|
14940 |
+
"epoch": 0.23545083673299474,
|
14941 |
+
"grad_norm": 0.5392758846282959,
|
14942 |
+
"learning_rate": 0.0001970535505000348,
|
14943 |
+
"loss": 0.1307,
|
14944 |
+
"step": 2121
|
14945 |
+
},
|
14946 |
+
{
|
14947 |
+
"epoch": 0.2355618460855327,
|
14948 |
+
"grad_norm": 0.528290331363678,
|
14949 |
+
"learning_rate": 0.00019705074554412055,
|
14950 |
+
"loss": 0.0903,
|
14951 |
+
"step": 2122
|
14952 |
+
},
|
14953 |
+
{
|
14954 |
+
"epoch": 0.23567285543807065,
|
14955 |
+
"grad_norm": 0.4860764145851135,
|
14956 |
+
"learning_rate": 0.00019704793927369555,
|
14957 |
+
"loss": 0.0879,
|
14958 |
+
"step": 2123
|
14959 |
+
},
|
14960 |
+
{
|
14961 |
+
"epoch": 0.23578386479060862,
|
14962 |
+
"grad_norm": 0.16266947984695435,
|
14963 |
+
"learning_rate": 0.00019704513168879788,
|
14964 |
+
"loss": 0.0786,
|
14965 |
+
"step": 2124
|
14966 |
+
},
|
14967 |
+
{
|
14968 |
+
"epoch": 0.23589487414314655,
|
14969 |
+
"grad_norm": 0.5353623628616333,
|
14970 |
+
"learning_rate": 0.0001970423227894655,
|
14971 |
+
"loss": 0.1089,
|
14972 |
+
"step": 2125
|
14973 |
+
},
|
14974 |
+
{
|
14975 |
+
"epoch": 0.23600588349568452,
|
14976 |
+
"grad_norm": 0.5310114026069641,
|
14977 |
+
"learning_rate": 0.00019703951257573645,
|
14978 |
+
"loss": 0.0729,
|
14979 |
+
"step": 2126
|
14980 |
+
},
|
14981 |
+
{
|
14982 |
+
"epoch": 0.23611689284822246,
|
14983 |
+
"grad_norm": 0.2388266623020172,
|
14984 |
+
"learning_rate": 0.00019703670104764888,
|
14985 |
+
"loss": 0.0669,
|
14986 |
+
"step": 2127
|
14987 |
+
},
|
14988 |
+
{
|
14989 |
+
"epoch": 0.23622790220076043,
|
14990 |
+
"grad_norm": 0.3173542022705078,
|
14991 |
+
"learning_rate": 0.00019703388820524077,
|
14992 |
+
"loss": 0.0875,
|
14993 |
+
"step": 2128
|
14994 |
+
},
|
14995 |
+
{
|
14996 |
+
"epoch": 0.23633891155329836,
|
14997 |
+
"grad_norm": 0.22432465851306915,
|
14998 |
+
"learning_rate": 0.00019703107404855024,
|
14999 |
+
"loss": 0.0768,
|
15000 |
+
"step": 2129
|
15001 |
+
},
|
15002 |
+
{
|
15003 |
+
"epoch": 0.23644992090583633,
|
15004 |
+
"grad_norm": 0.3916042745113373,
|
15005 |
+
"learning_rate": 0.00019702825857761545,
|
15006 |
+
"loss": 0.0852,
|
15007 |
+
"step": 2130
|
15008 |
+
},
|
15009 |
+
{
|
15010 |
+
"epoch": 0.23656093025837427,
|
15011 |
+
"grad_norm": 0.13182367384433746,
|
15012 |
+
"learning_rate": 0.00019702544179247452,
|
15013 |
+
"loss": 0.0647,
|
15014 |
+
"step": 2131
|
15015 |
+
},
|
15016 |
+
{
|
15017 |
+
"epoch": 0.2366719396109122,
|
15018 |
+
"grad_norm": 0.4878508746623993,
|
15019 |
+
"learning_rate": 0.00019702262369316557,
|
15020 |
+
"loss": 0.0951,
|
15021 |
+
"step": 2132
|
15022 |
+
},
|
15023 |
+
{
|
15024 |
+
"epoch": 0.23678294896345017,
|
15025 |
+
"grad_norm": 0.1852872669696808,
|
15026 |
+
"learning_rate": 0.00019701980427972682,
|
15027 |
+
"loss": 0.0827,
|
15028 |
+
"step": 2133
|
15029 |
+
},
|
15030 |
+
{
|
15031 |
+
"epoch": 0.2368939583159881,
|
15032 |
+
"grad_norm": 0.15223491191864014,
|
15033 |
+
"learning_rate": 0.00019701698355219642,
|
15034 |
+
"loss": 0.0786,
|
15035 |
+
"step": 2134
|
15036 |
+
},
|
15037 |
+
{
|
15038 |
+
"epoch": 0.23700496766852608,
|
15039 |
+
"grad_norm": 0.16859480738639832,
|
15040 |
+
"learning_rate": 0.0001970141615106126,
|
15041 |
+
"loss": 0.0855,
|
15042 |
+
"step": 2135
|
15043 |
+
},
|
15044 |
+
{
|
15045 |
+
"epoch": 0.23711597702106402,
|
15046 |
+
"grad_norm": 0.6331989765167236,
|
15047 |
+
"learning_rate": 0.00019701133815501357,
|
15048 |
+
"loss": 0.0884,
|
15049 |
+
"step": 2136
|
15050 |
+
},
|
15051 |
+
{
|
15052 |
+
"epoch": 0.23722698637360198,
|
15053 |
+
"grad_norm": 0.1685618907213211,
|
15054 |
+
"learning_rate": 0.00019700851348543753,
|
15055 |
+
"loss": 0.0718,
|
15056 |
+
"step": 2137
|
15057 |
+
},
|
15058 |
+
{
|
15059 |
+
"epoch": 0.23733799572613992,
|
15060 |
+
"grad_norm": 0.2580687403678894,
|
15061 |
+
"learning_rate": 0.00019700568750192284,
|
15062 |
+
"loss": 0.0839,
|
15063 |
+
"step": 2138
|
15064 |
+
},
|
15065 |
+
{
|
15066 |
+
"epoch": 0.2374490050786779,
|
15067 |
+
"grad_norm": 0.09570122510194778,
|
15068 |
+
"learning_rate": 0.00019700286020450766,
|
15069 |
+
"loss": 0.0643,
|
15070 |
+
"step": 2139
|
15071 |
+
},
|
15072 |
+
{
|
15073 |
+
"epoch": 0.23756001443121583,
|
15074 |
+
"grad_norm": 0.35887497663497925,
|
15075 |
+
"learning_rate": 0.00019700003159323038,
|
15076 |
+
"loss": 0.0768,
|
15077 |
+
"step": 2140
|
15078 |
+
},
|
15079 |
+
{
|
15080 |
+
"epoch": 0.2376710237837538,
|
15081 |
+
"grad_norm": 0.1919344663619995,
|
15082 |
+
"learning_rate": 0.00019699720166812927,
|
15083 |
+
"loss": 0.0899,
|
15084 |
+
"step": 2141
|
15085 |
+
},
|
15086 |
+
{
|
15087 |
+
"epoch": 0.23778203313629173,
|
15088 |
+
"grad_norm": 0.19538532197475433,
|
15089 |
+
"learning_rate": 0.00019699437042924265,
|
15090 |
+
"loss": 0.0831,
|
15091 |
+
"step": 2142
|
15092 |
+
},
|
15093 |
+
{
|
15094 |
+
"epoch": 0.23789304248882967,
|
15095 |
+
"grad_norm": 0.21556827425956726,
|
15096 |
+
"learning_rate": 0.00019699153787660888,
|
15097 |
+
"loss": 0.0921,
|
15098 |
+
"step": 2143
|
15099 |
+
},
|
15100 |
+
{
|
15101 |
+
"epoch": 0.23800405184136764,
|
15102 |
+
"grad_norm": 0.17883862555027008,
|
15103 |
+
"learning_rate": 0.00019698870401026634,
|
15104 |
+
"loss": 0.088,
|
15105 |
+
"step": 2144
|
15106 |
+
},
|
15107 |
+
{
|
15108 |
+
"epoch": 0.23811506119390558,
|
15109 |
+
"grad_norm": 0.14607980847358704,
|
15110 |
+
"learning_rate": 0.0001969858688302534,
|
15111 |
+
"loss": 0.0747,
|
15112 |
+
"step": 2145
|
15113 |
+
},
|
15114 |
+
{
|
15115 |
+
"epoch": 0.23822607054644354,
|
15116 |
+
"grad_norm": 0.37790095806121826,
|
15117 |
+
"learning_rate": 0.00019698303233660846,
|
15118 |
+
"loss": 0.1136,
|
15119 |
+
"step": 2146
|
15120 |
+
},
|
15121 |
+
{
|
15122 |
+
"epoch": 0.23833707989898148,
|
15123 |
+
"grad_norm": 0.19577501714229584,
|
15124 |
+
"learning_rate": 0.00019698019452936994,
|
15125 |
+
"loss": 0.116,
|
15126 |
+
"step": 2147
|
15127 |
+
},
|
15128 |
+
{
|
15129 |
+
"epoch": 0.23844808925151945,
|
15130 |
+
"grad_norm": 0.27278584241867065,
|
15131 |
+
"learning_rate": 0.00019697735540857628,
|
15132 |
+
"loss": 0.0993,
|
15133 |
+
"step": 2148
|
15134 |
+
},
|
15135 |
+
{
|
15136 |
+
"epoch": 0.23855909860405738,
|
15137 |
+
"grad_norm": 0.19815939664840698,
|
15138 |
+
"learning_rate": 0.00019697451497426594,
|
15139 |
+
"loss": 0.0959,
|
15140 |
+
"step": 2149
|
15141 |
+
},
|
15142 |
+
{
|
15143 |
+
"epoch": 0.23867010795659535,
|
15144 |
+
"grad_norm": 0.09901012480258942,
|
15145 |
+
"learning_rate": 0.00019697167322647738,
|
15146 |
+
"loss": 0.0823,
|
15147 |
+
"step": 2150
|
15148 |
+
},
|
15149 |
+
{
|
15150 |
+
"epoch": 0.2387811173091333,
|
15151 |
+
"grad_norm": 0.14086809754371643,
|
15152 |
+
"learning_rate": 0.00019696883016524907,
|
15153 |
+
"loss": 0.0684,
|
15154 |
+
"step": 2151
|
15155 |
+
},
|
15156 |
+
{
|
15157 |
+
"epoch": 0.23889212666167126,
|
15158 |
+
"grad_norm": 0.22447887063026428,
|
15159 |
+
"learning_rate": 0.00019696598579061958,
|
15160 |
+
"loss": 0.0835,
|
15161 |
+
"step": 2152
|
15162 |
+
},
|
15163 |
+
{
|
15164 |
+
"epoch": 0.2390031360142092,
|
15165 |
+
"grad_norm": 0.1954028457403183,
|
15166 |
+
"learning_rate": 0.00019696314010262737,
|
15167 |
+
"loss": 0.0873,
|
15168 |
+
"step": 2153
|
15169 |
+
},
|
15170 |
+
{
|
15171 |
+
"epoch": 0.23911414536674716,
|
15172 |
+
"grad_norm": 0.2292979508638382,
|
15173 |
+
"learning_rate": 0.00019696029310131103,
|
15174 |
+
"loss": 0.0852,
|
15175 |
+
"step": 2154
|
15176 |
+
},
|
15177 |
+
{
|
15178 |
+
"epoch": 0.2392251547192851,
|
15179 |
+
"grad_norm": 0.36275601387023926,
|
15180 |
+
"learning_rate": 0.0001969574447867091,
|
15181 |
+
"loss": 0.084,
|
15182 |
+
"step": 2155
|
15183 |
+
},
|
15184 |
+
{
|
15185 |
+
"epoch": 0.23933616407182304,
|
15186 |
+
"grad_norm": 0.2298329919576645,
|
15187 |
+
"learning_rate": 0.00019695459515886018,
|
15188 |
+
"loss": 0.0728,
|
15189 |
+
"step": 2156
|
15190 |
+
},
|
15191 |
+
{
|
15192 |
+
"epoch": 0.239447173424361,
|
15193 |
+
"grad_norm": 0.3617827892303467,
|
15194 |
+
"learning_rate": 0.00019695174421780284,
|
15195 |
+
"loss": 0.1069,
|
15196 |
+
"step": 2157
|
15197 |
+
},
|
15198 |
+
{
|
15199 |
+
"epoch": 0.23955818277689894,
|
15200 |
+
"grad_norm": 0.317531555891037,
|
15201 |
+
"learning_rate": 0.0001969488919635757,
|
15202 |
+
"loss": 0.0831,
|
15203 |
+
"step": 2158
|
15204 |
+
},
|
15205 |
+
{
|
15206 |
+
"epoch": 0.2396691921294369,
|
15207 |
+
"grad_norm": 0.3184008002281189,
|
15208 |
+
"learning_rate": 0.00019694603839621745,
|
15209 |
+
"loss": 0.1055,
|
15210 |
+
"step": 2159
|
15211 |
+
},
|
15212 |
+
{
|
15213 |
+
"epoch": 0.23978020148197485,
|
15214 |
+
"grad_norm": 0.2519459128379822,
|
15215 |
+
"learning_rate": 0.00019694318351576662,
|
15216 |
+
"loss": 0.1095,
|
15217 |
+
"step": 2160
|
15218 |
+
},
|
15219 |
+
{
|
15220 |
+
"epoch": 0.2398912108345128,
|
15221 |
+
"grad_norm": 0.33748704195022583,
|
15222 |
+
"learning_rate": 0.00019694032732226195,
|
15223 |
+
"loss": 0.1069,
|
15224 |
+
"step": 2161
|
15225 |
+
},
|
15226 |
+
{
|
15227 |
+
"epoch": 0.24000222018705075,
|
15228 |
+
"grad_norm": 0.3038945496082306,
|
15229 |
+
"learning_rate": 0.00019693746981574214,
|
15230 |
+
"loss": 0.1184,
|
15231 |
+
"step": 2162
|
15232 |
+
},
|
15233 |
+
{
|
15234 |
+
"epoch": 0.24011322953958872,
|
15235 |
+
"grad_norm": 0.24251121282577515,
|
15236 |
+
"learning_rate": 0.0001969346109962459,
|
15237 |
+
"loss": 0.0747,
|
15238 |
+
"step": 2163
|
15239 |
+
},
|
15240 |
+
{
|
15241 |
+
"epoch": 0.24022423889212666,
|
15242 |
+
"grad_norm": 0.3548624813556671,
|
15243 |
+
"learning_rate": 0.0001969317508638119,
|
15244 |
+
"loss": 0.1083,
|
15245 |
+
"step": 2164
|
15246 |
+
},
|
15247 |
+
{
|
15248 |
+
"epoch": 0.24033524824466462,
|
15249 |
+
"grad_norm": 0.21120557188987732,
|
15250 |
+
"learning_rate": 0.0001969288894184789,
|
15251 |
+
"loss": 0.0899,
|
15252 |
+
"step": 2165
|
15253 |
+
},
|
15254 |
+
{
|
15255 |
+
"epoch": 0.24044625759720256,
|
15256 |
+
"grad_norm": 0.5238838791847229,
|
15257 |
+
"learning_rate": 0.0001969260266602857,
|
15258 |
+
"loss": 0.1027,
|
15259 |
+
"step": 2166
|
15260 |
+
},
|
15261 |
+
{
|
15262 |
+
"epoch": 0.24055726694974053,
|
15263 |
+
"grad_norm": 0.19483251869678497,
|
15264 |
+
"learning_rate": 0.00019692316258927098,
|
15265 |
+
"loss": 0.0834,
|
15266 |
+
"step": 2167
|
15267 |
+
},
|
15268 |
+
{
|
15269 |
+
"epoch": 0.24066827630227847,
|
15270 |
+
"grad_norm": 0.23273009061813354,
|
15271 |
+
"learning_rate": 0.00019692029720547364,
|
15272 |
+
"loss": 0.0985,
|
15273 |
+
"step": 2168
|
15274 |
+
},
|
15275 |
+
{
|
15276 |
+
"epoch": 0.2407792856548164,
|
15277 |
+
"grad_norm": 0.16679035127162933,
|
15278 |
+
"learning_rate": 0.00019691743050893244,
|
15279 |
+
"loss": 0.0838,
|
15280 |
+
"step": 2169
|
15281 |
+
},
|
15282 |
+
{
|
15283 |
+
"epoch": 0.24089029500735437,
|
15284 |
+
"grad_norm": 0.2921470105648041,
|
15285 |
+
"learning_rate": 0.0001969145624996862,
|
15286 |
+
"loss": 0.0958,
|
15287 |
+
"step": 2170
|
15288 |
+
},
|
15289 |
+
{
|
15290 |
+
"epoch": 0.2410013043598923,
|
15291 |
+
"grad_norm": 0.17527632415294647,
|
15292 |
+
"learning_rate": 0.00019691169317777378,
|
15293 |
+
"loss": 0.0727,
|
15294 |
+
"step": 2171
|
15295 |
+
},
|
15296 |
+
{
|
15297 |
+
"epoch": 0.24111231371243028,
|
15298 |
+
"grad_norm": 0.497994989156723,
|
15299 |
+
"learning_rate": 0.00019690882254323402,
|
15300 |
+
"loss": 0.0943,
|
15301 |
+
"step": 2172
|
15302 |
+
},
|
15303 |
+
{
|
15304 |
+
"epoch": 0.24122332306496821,
|
15305 |
+
"grad_norm": 0.18134763836860657,
|
15306 |
+
"learning_rate": 0.00019690595059610586,
|
15307 |
+
"loss": 0.1397,
|
15308 |
+
"step": 2173
|
15309 |
+
},
|
15310 |
+
{
|
15311 |
+
"epoch": 0.24133433241750618,
|
15312 |
+
"grad_norm": 1.0062270164489746,
|
15313 |
+
"learning_rate": 0.00019690307733642812,
|
15314 |
+
"loss": 0.1127,
|
15315 |
+
"step": 2174
|
15316 |
+
},
|
15317 |
+
{
|
15318 |
+
"epoch": 0.24144534177004412,
|
15319 |
+
"grad_norm": 0.19196709990501404,
|
15320 |
+
"learning_rate": 0.0001969002027642398,
|
15321 |
+
"loss": 0.0924,
|
15322 |
+
"step": 2175
|
15323 |
+
},
|
15324 |
+
{
|
15325 |
+
"epoch": 0.24155635112258209,
|
15326 |
+
"grad_norm": 0.2562776505947113,
|
15327 |
+
"learning_rate": 0.00019689732687957974,
|
15328 |
+
"loss": 0.0988,
|
15329 |
+
"step": 2176
|
15330 |
+
},
|
15331 |
+
{
|
15332 |
+
"epoch": 0.24166736047512002,
|
15333 |
+
"grad_norm": 0.09118346124887466,
|
15334 |
+
"learning_rate": 0.00019689444968248697,
|
15335 |
+
"loss": 0.0744,
|
15336 |
+
"step": 2177
|
15337 |
+
},
|
15338 |
+
{
|
15339 |
+
"epoch": 0.241778369827658,
|
15340 |
+
"grad_norm": 0.1430000215768814,
|
15341 |
+
"learning_rate": 0.00019689157117300045,
|
15342 |
+
"loss": 0.1138,
|
15343 |
+
"step": 2178
|
15344 |
+
},
|
15345 |
+
{
|
15346 |
+
"epoch": 0.24188937918019593,
|
15347 |
+
"grad_norm": 0.120170958340168,
|
15348 |
+
"learning_rate": 0.00019688869135115912,
|
15349 |
+
"loss": 0.0741,
|
15350 |
+
"step": 2179
|
15351 |
+
},
|
15352 |
+
{
|
15353 |
+
"epoch": 0.2420003885327339,
|
15354 |
+
"grad_norm": 0.13768386840820312,
|
15355 |
+
"learning_rate": 0.00019688581021700205,
|
15356 |
+
"loss": 0.0828,
|
15357 |
+
"step": 2180
|
15358 |
+
},
|
15359 |
+
{
|
15360 |
+
"epoch": 0.24211139788527183,
|
15361 |
+
"grad_norm": 0.1340225785970688,
|
15362 |
+
"learning_rate": 0.00019688292777056825,
|
15363 |
+
"loss": 0.0952,
|
15364 |
+
"step": 2181
|
15365 |
+
},
|
15366 |
+
{
|
15367 |
+
"epoch": 0.24222240723780977,
|
15368 |
+
"grad_norm": 0.2026708424091339,
|
15369 |
+
"learning_rate": 0.0001968800440118967,
|
15370 |
+
"loss": 0.0821,
|
15371 |
+
"step": 2182
|
15372 |
+
},
|
15373 |
+
{
|
15374 |
+
"epoch": 0.24233341659034774,
|
15375 |
+
"grad_norm": 0.1743718385696411,
|
15376 |
+
"learning_rate": 0.00019687715894102655,
|
15377 |
+
"loss": 0.0738,
|
15378 |
+
"step": 2183
|
15379 |
+
},
|
15380 |
+
{
|
15381 |
+
"epoch": 0.24244442594288568,
|
15382 |
+
"grad_norm": 0.8386828303337097,
|
15383 |
+
"learning_rate": 0.0001968742725579968,
|
15384 |
+
"loss": 0.1432,
|
15385 |
+
"step": 2184
|
15386 |
+
},
|
15387 |
+
{
|
15388 |
+
"epoch": 0.24255543529542364,
|
15389 |
+
"grad_norm": 0.14606383442878723,
|
15390 |
+
"learning_rate": 0.00019687138486284658,
|
15391 |
+
"loss": 0.0949,
|
15392 |
+
"step": 2185
|
15393 |
+
},
|
15394 |
+
{
|
15395 |
+
"epoch": 0.24266644464796158,
|
15396 |
+
"grad_norm": 0.1345566064119339,
|
15397 |
+
"learning_rate": 0.00019686849585561498,
|
15398 |
+
"loss": 0.0861,
|
15399 |
+
"step": 2186
|
15400 |
+
},
|
15401 |
+
{
|
15402 |
+
"epoch": 0.24277745400049955,
|
15403 |
+
"grad_norm": 0.3952319920063019,
|
15404 |
+
"learning_rate": 0.00019686560553634118,
|
15405 |
+
"loss": 0.1184,
|
15406 |
+
"step": 2187
|
15407 |
+
},
|
15408 |
+
{
|
15409 |
+
"epoch": 0.2428884633530375,
|
15410 |
+
"grad_norm": 0.2549149692058563,
|
15411 |
+
"learning_rate": 0.00019686271390506428,
|
15412 |
+
"loss": 0.0894,
|
15413 |
+
"step": 2188
|
15414 |
+
},
|
15415 |
+
{
|
15416 |
+
"epoch": 0.24299947270557545,
|
15417 |
+
"grad_norm": 0.5761973857879639,
|
15418 |
+
"learning_rate": 0.00019685982096182347,
|
15419 |
+
"loss": 0.1013,
|
15420 |
+
"step": 2189
|
15421 |
+
},
|
15422 |
+
{
|
15423 |
+
"epoch": 0.2431104820581134,
|
15424 |
+
"grad_norm": 0.1078142300248146,
|
15425 |
+
"learning_rate": 0.00019685692670665792,
|
15426 |
+
"loss": 0.0793,
|
15427 |
+
"step": 2190
|
15428 |
+
},
|
15429 |
+
{
|
15430 |
+
"epoch": 0.24322149141065136,
|
15431 |
+
"grad_norm": 0.1755504459142685,
|
15432 |
+
"learning_rate": 0.00019685403113960687,
|
15433 |
+
"loss": 0.1008,
|
15434 |
+
"step": 2191
|
15435 |
+
},
|
15436 |
+
{
|
15437 |
+
"epoch": 0.2433325007631893,
|
15438 |
+
"grad_norm": 0.08662787824869156,
|
15439 |
+
"learning_rate": 0.00019685113426070946,
|
15440 |
+
"loss": 0.0765,
|
15441 |
+
"step": 2192
|
15442 |
+
},
|
15443 |
+
{
|
15444 |
+
"epoch": 0.24344351011572726,
|
15445 |
+
"grad_norm": 0.2538180947303772,
|
15446 |
+
"learning_rate": 0.00019684823607000503,
|
15447 |
+
"loss": 0.08,
|
15448 |
+
"step": 2193
|
15449 |
+
},
|
15450 |
+
{
|
15451 |
+
"epoch": 0.2435545194682652,
|
15452 |
+
"grad_norm": 0.11517667770385742,
|
15453 |
+
"learning_rate": 0.00019684533656753273,
|
15454 |
+
"loss": 0.0758,
|
15455 |
+
"step": 2194
|
15456 |
+
},
|
15457 |
+
{
|
15458 |
+
"epoch": 0.24366552882080314,
|
15459 |
+
"grad_norm": 0.08525574952363968,
|
15460 |
+
"learning_rate": 0.00019684243575333188,
|
15461 |
+
"loss": 0.0936,
|
15462 |
+
"step": 2195
|
15463 |
+
},
|
15464 |
+
{
|
15465 |
+
"epoch": 0.2437765381733411,
|
15466 |
+
"grad_norm": 0.07251805812120438,
|
15467 |
+
"learning_rate": 0.0001968395336274418,
|
15468 |
+
"loss": 0.0719,
|
15469 |
+
"step": 2196
|
15470 |
+
},
|
15471 |
+
{
|
15472 |
+
"epoch": 0.24388754752587904,
|
15473 |
+
"grad_norm": 0.14106275141239166,
|
15474 |
+
"learning_rate": 0.00019683663018990174,
|
15475 |
+
"loss": 0.0733,
|
15476 |
+
"step": 2197
|
15477 |
+
},
|
15478 |
+
{
|
15479 |
+
"epoch": 0.243998556878417,
|
15480 |
+
"grad_norm": 8.645146369934082,
|
15481 |
+
"learning_rate": 0.00019683372544075108,
|
15482 |
+
"loss": 0.1518,
|
15483 |
+
"step": 2198
|
15484 |
+
},
|
15485 |
+
{
|
15486 |
+
"epoch": 0.24410956623095495,
|
15487 |
+
"grad_norm": 0.2157564014196396,
|
15488 |
+
"learning_rate": 0.00019683081938002913,
|
15489 |
+
"loss": 0.0855,
|
15490 |
+
"step": 2199
|
15491 |
+
},
|
15492 |
+
{
|
15493 |
+
"epoch": 0.24422057558349292,
|
15494 |
+
"grad_norm": 0.4537072479724884,
|
15495 |
+
"learning_rate": 0.00019682791200777527,
|
15496 |
+
"loss": 0.1019,
|
15497 |
+
"step": 2200
|
15498 |
+
},
|
15499 |
+
{
|
15500 |
+
"epoch": 0.24422057558349292,
|
15501 |
+
"eval_loss": 0.08438266813755035,
|
15502 |
+
"eval_runtime": 73.0572,
|
15503 |
+
"eval_samples_per_second": 2.491,
|
15504 |
+
"eval_steps_per_second": 2.491,
|
15505 |
+
"step": 2200
|
15506 |
}
|
15507 |
],
|
15508 |
"logging_steps": 1,
|
|
|
15517 |
"early_stopping_threshold": 0.0
|
15518 |
},
|
15519 |
"attributes": {
|
15520 |
+
"early_stopping_patience_counter": 0
|
15521 |
}
|
15522 |
},
|
15523 |
"TrainerControl": {
|
|
|
15531 |
"attributes": {}
|
15532 |
}
|
15533 |
},
|
15534 |
+
"total_flos": 3.5082052042752e+17,
|
15535 |
"train_batch_size": 1,
|
15536 |
"trial_name": null,
|
15537 |
"trial_params": null
|