ashanhr commited on
Commit
792c1fe
·
verified ·
1 Parent(s): 2a0ff47

Training in progress, step 15100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:754b1828cc167e6cab0ffbfc255be209fdb52e699fa01a76ef98fb551e01e46f
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd61a41f4cf03f5858195ca5fe96d3c3669fe49f638ea09912e26a01aa52e086
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a389819ab49bc7ca78cb2be2fa01aa4870f6a6faba478c25fd1690a6389bc966
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0916989ad40b3bfe12c6ac9e11f6329dbeaad0ddceacffcd280cb2def46715a8
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:414a7a1b420818e98a3d84f5bcf74aa2f7777a0ef52dcbde83df3ffb6cdca039
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a98ae89a09009e626d84ebaa74d059e720c000f959a2a59e925259dd1b538d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c0687a4091703f6184c454999271ab1b078907b3d7227b3900790c9d656ca3
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18a3a2fcaa038440f8b838aa2256d7e37374c67a3328231524132648f3eb179
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c25ac89a1303cae92d19e5e1836924b87fa1d1a947021780db194d57b5ef2f4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29cec3c1ed3d0958d8f6b240320ad40e3ee07494f39436857c7553488102e86
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.272363712902547,
5
  "eval_steps": 100,
6
- "global_step": 14900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2391,6 +2391,38 @@
2391
  "eval_samples_per_second": 26.387,
2392
  "eval_steps_per_second": 3.299,
2393
  "step": 14900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2394
  }
2395
  ],
2396
  "logging_steps": 100,
@@ -2398,7 +2430,7 @@
2398
  "num_input_tokens_seen": 0,
2399
  "num_train_epochs": 30,
2400
  "save_steps": 100,
2401
- "total_flos": 1.63280482609936e+20,
2402
  "train_batch_size": 8,
2403
  "trial_name": null,
2404
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.356556514418017,
5
  "eval_steps": 100,
6
+ "global_step": 15100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2391
  "eval_samples_per_second": 26.387,
2392
  "eval_steps_per_second": 3.299,
2393
  "step": 14900
2394
+ },
2395
+ {
2396
+ "epoch": 6.31,
2397
+ "grad_norm": 4.323084354400635,
2398
+ "learning_rate": 3.976042402826856e-05,
2399
+ "loss": 1.4552,
2400
+ "step": 15000
2401
+ },
2402
+ {
2403
+ "epoch": 6.31,
2404
+ "eval_cer": 0.459111395721716,
2405
+ "eval_loss": 2.0859220027923584,
2406
+ "eval_runtime": 380.4099,
2407
+ "eval_samples_per_second": 24.915,
2408
+ "eval_steps_per_second": 3.115,
2409
+ "step": 15000
2410
+ },
2411
+ {
2412
+ "epoch": 6.36,
2413
+ "grad_norm": 3.9104325771331787,
2414
+ "learning_rate": 3.968975265017668e-05,
2415
+ "loss": 2.836,
2416
+ "step": 15100
2417
+ },
2418
+ {
2419
+ "epoch": 6.36,
2420
+ "eval_cer": 0.47354180516991906,
2421
+ "eval_loss": 1.5567090511322021,
2422
+ "eval_runtime": 368.3958,
2423
+ "eval_samples_per_second": 25.728,
2424
+ "eval_steps_per_second": 3.217,
2425
+ "step": 15100
2426
  }
2427
  ],
2428
  "logging_steps": 100,
 
2430
  "num_input_tokens_seen": 0,
2431
  "num_train_epochs": 30,
2432
  "save_steps": 100,
2433
+ "total_flos": 1.6546343911883774e+20,
2434
  "train_batch_size": 8,
2435
  "trial_name": null,
2436
  "trial_params": null