ashanhr commited on
Commit
a4c7546
·
verified ·
1 Parent(s): ea3dec0

Training in progress, step 22700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a6f6fa04efb391c7b9fbd08481c3254f51e8b2441232585f62a46a10bf14603
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f673b59dec39b6208eaf91ca45400990b1ab1106cb32adb89e40289b3692b7e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d778a8ad947e1c4d0ef5534ea92304b2cc11141de4fb34b0cf684c5c48ad306b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19267df07b64ec62c5070fedddb73269d34ceb079fafa1cc02aa086792ab3300
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc91affcdb5daa6221794d9599eeb12bcba1f681e61698d0b5225d19c1246f0d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7974ef7e39b635024ed4786978103eb9d236e553fc1a70a6679023daa9296c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:275755e5f99f71b688f2ecd5a64a5f85d430b909b06bf279b4dfbdc41ff3aab1
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8052a894386dd37f759deaab2c0d19acd6e44b9e9130c47c71e3f213d13e6053
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b142e6816a019f0e45a0c6af01127180a64accf85a0b47c2b9c58feab018eef9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49d336b26493b459dab233dfc32ed3bf79ed6ce889dcb67006a60f916875b97
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.471690170490422,
5
  "eval_steps": 100,
6
- "global_step": 22500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3607,6 +3607,38 @@
3607
  "eval_samples_per_second": 25.533,
3608
  "eval_steps_per_second": 3.192,
3609
  "step": 22500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3610
  }
3611
  ],
3612
  "logging_steps": 100,
@@ -3614,7 +3646,7 @@
3614
  "num_input_tokens_seen": 0,
3615
  "num_train_epochs": 30,
3616
  "save_steps": 100,
3617
- "total_flos": 2.4656325068595944e+20,
3618
  "train_batch_size": 8,
3619
  "trial_name": null,
3620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.555882972005893,
5
  "eval_steps": 100,
6
+ "global_step": 22700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3607
  "eval_samples_per_second": 25.533,
3608
  "eval_steps_per_second": 3.192,
3609
  "step": 22500
3610
+ },
3611
+ {
3612
+ "epoch": 9.51,
3613
+ "grad_norm": 2.171220302581787,
3614
+ "learning_rate": 3.439151943462898e-05,
3615
+ "loss": 1.2667,
3616
+ "step": 22600
3617
+ },
3618
+ {
3619
+ "epoch": 9.51,
3620
+ "eval_cer": 0.44492784795275897,
3621
+ "eval_loss": 1.6504524946212769,
3622
+ "eval_runtime": 384.6648,
3623
+ "eval_samples_per_second": 24.64,
3624
+ "eval_steps_per_second": 3.081,
3625
+ "step": 22600
3626
+ },
3627
+ {
3628
+ "epoch": 9.56,
3629
+ "grad_norm": 4.868325233459473,
3630
+ "learning_rate": 3.43208480565371e-05,
3631
+ "loss": 1.2425,
3632
+ "step": 22700
3633
+ },
3634
+ {
3635
+ "epoch": 9.56,
3636
+ "eval_cer": 0.4337261937350905,
3637
+ "eval_loss": 1.6726810932159424,
3638
+ "eval_runtime": 360.9955,
3639
+ "eval_samples_per_second": 26.255,
3640
+ "eval_steps_per_second": 3.283,
3641
+ "step": 22700
3642
  }
3643
  ],
3644
  "logging_steps": 100,
 
3646
  "num_input_tokens_seen": 0,
3647
  "num_train_epochs": 30,
3648
  "save_steps": 100,
3649
+ "total_flos": 2.487348846779533e+20,
3650
  "train_batch_size": 8,
3651
  "trial_name": null,
3652
  "trial_params": null