Training in progress, step 18800, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb4a32dca6535b97ec9d409e1a20428735a6caf1a07d9a581e33bfebdd98da98
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deeb9007a2105b9fb534ca944aa12eb54abb9e830e8010cc6a86bc5057ba3f51
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56b60272ee229edfde841bdf64339526be164c159e635b4cca6a36b012e8b8f8
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76df4b30fe1f8da5a7cc93cde7f754197127ebb58da2a061a690a113c1c8f092
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86f6690eee3cc7efd55068f6778b0cdb2acd34211dc189d0a8d254cc2fc4d966
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2983,6 +2983,38 @@
|
|
2983 |
"eval_samples_per_second": 25.775,
|
2984 |
"eval_steps_per_second": 3.223,
|
2985 |
"step": 18600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2986 |
}
|
2987 |
],
|
2988 |
"logging_steps": 100,
|
@@ -2990,7 +3022,7 @@
|
|
2990 |
"num_input_tokens_seen": 0,
|
2991 |
"num_train_epochs": 30,
|
2992 |
"save_steps": 100,
|
2993 |
-
"total_flos": 2.
|
2994 |
"train_batch_size": 8,
|
2995 |
"trial_name": null,
|
2996 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.91412334245422,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 18800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2983 |
"eval_samples_per_second": 25.775,
|
2984 |
"eval_steps_per_second": 3.223,
|
2985 |
"step": 18600
|
2986 |
+
},
|
2987 |
+
{
|
2988 |
+
"epoch": 7.87,
|
2989 |
+
"grad_norm": 2.091411590576172,
|
2990 |
+
"learning_rate": 3.714628975265018e-05,
|
2991 |
+
"loss": 1.7498,
|
2992 |
+
"step": 18700
|
2993 |
+
},
|
2994 |
+
{
|
2995 |
+
"epoch": 7.87,
|
2996 |
+
"eval_cer": 0.44587129951898635,
|
2997 |
+
"eval_loss": 2.4873616695404053,
|
2998 |
+
"eval_runtime": 381.5796,
|
2999 |
+
"eval_samples_per_second": 24.839,
|
3000 |
+
"eval_steps_per_second": 3.106,
|
3001 |
+
"step": 18700
|
3002 |
+
},
|
3003 |
+
{
|
3004 |
+
"epoch": 7.91,
|
3005 |
+
"grad_norm": 2.1210122108459473,
|
3006 |
+
"learning_rate": 3.7075618374558305e-05,
|
3007 |
+
"loss": 1.4463,
|
3008 |
+
"step": 18800
|
3009 |
+
},
|
3010 |
+
{
|
3011 |
+
"epoch": 7.91,
|
3012 |
+
"eval_cer": 0.4508696374799578,
|
3013 |
+
"eval_loss": 2.1447994709014893,
|
3014 |
+
"eval_runtime": 360.7307,
|
3015 |
+
"eval_samples_per_second": 26.274,
|
3016 |
+
"eval_steps_per_second": 3.285,
|
3017 |
+
"step": 18800
|
3018 |
}
|
3019 |
],
|
3020 |
"logging_steps": 100,
|
|
|
3022 |
"num_input_tokens_seen": 0,
|
3023 |
"num_train_epochs": 30,
|
3024 |
"save_steps": 100,
|
3025 |
+
"total_flos": 2.0599593093311608e+20,
|
3026 |
"train_batch_size": 8,
|
3027 |
"trial_name": null,
|
3028 |
"trial_params": null
|