Training in progress, step 14300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5944066b223cfd00c404cf3a6499df6bf1156481f8e46db2244f54b83303cf94
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91b270efdfcaabe2c52fc6c456f6d62179665c124b6a42e9797a2f9dffe4b1c0
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:702b99df31af42d1120ccb356b372e5108301018183cd956500f1cbd4ca79c15
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ecbc500923ed650d6f32ad8c69af7efe7dabee68519bf7ac279027b01c64989
|
3 |
+
size 14631
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:417d56dc8d90d6f81699627cbe27ea28c719a2a1f4ac895f35d5afcaf9387b09
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2263,6 +2263,38 @@
|
|
2263 |
"eval_samples_per_second": 24.699,
|
2264 |
"eval_steps_per_second": 3.088,
|
2265 |
"step": 14100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2266 |
}
|
2267 |
],
|
2268 |
"logging_steps": 100,
|
@@ -2270,7 +2302,7 @@
|
|
2270 |
"num_input_tokens_seen": 0,
|
2271 |
"num_train_epochs": 30,
|
2272 |
"save_steps": 100,
|
2273 |
-
"total_flos": 1.
|
2274 |
"train_batch_size": 8,
|
2275 |
"trial_name": null,
|
2276 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.019785308356136,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 14300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2263 |
"eval_samples_per_second": 24.699,
|
2264 |
"eval_steps_per_second": 3.088,
|
2265 |
"step": 14100
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 5.98,
|
2269 |
+
"grad_norm": 2.196765184402466,
|
2270 |
+
"learning_rate": 4.032579505300353e-05,
|
2271 |
+
"loss": 2.0249,
|
2272 |
+
"step": 14200
|
2273 |
+
},
|
2274 |
+
{
|
2275 |
+
"epoch": 5.98,
|
2276 |
+
"eval_cer": 0.4723050525986469,
|
2277 |
+
"eval_loss": 1.9896740913391113,
|
2278 |
+
"eval_runtime": 382.0154,
|
2279 |
+
"eval_samples_per_second": 24.811,
|
2280 |
+
"eval_steps_per_second": 3.102,
|
2281 |
+
"step": 14200
|
2282 |
+
},
|
2283 |
+
{
|
2284 |
+
"epoch": 6.02,
|
2285 |
+
"grad_norm": 2.4070873260498047,
|
2286 |
+
"learning_rate": 4.025512367491166e-05,
|
2287 |
+
"loss": 2.6354,
|
2288 |
+
"step": 14300
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 6.02,
|
2292 |
+
"eval_cer": 0.46424660748504165,
|
2293 |
+
"eval_loss": 1.524404525756836,
|
2294 |
+
"eval_runtime": 361.2522,
|
2295 |
+
"eval_samples_per_second": 26.237,
|
2296 |
+
"eval_steps_per_second": 3.28,
|
2297 |
+
"step": 14300
|
2298 |
}
|
2299 |
],
|
2300 |
"logging_steps": 100,
|
|
|
2302 |
"num_input_tokens_seen": 0,
|
2303 |
"num_train_epochs": 30,
|
2304 |
"save_steps": 100,
|
2305 |
+
"total_flos": 1.5667869779936743e+20,
|
2306 |
"train_batch_size": 8,
|
2307 |
"trial_name": null,
|
2308 |
"trial_params": null
|