ashanhr commited on
Commit
ce3baa1
1 Parent(s): cafa1c8

Training in progress, step 31100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a48a3ca955f4c3490521ffa16267bda052fbae1914e6e361907a2828816f0c77
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237468a1c8fe2d6d8ef6319e2cc1c429e454b699da043b4f7922bed2a6fc5d74
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:982196fc29592e305eafca95afb014310ab8940220bbbb1a5a33250f58ddf315
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c1b2ca9ce98af1ff8d88d334a04a834fe19c5bc2e097ac0d5ed5d2377807c5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3a59837b584d61da7f6862e8ac72f4bc9bb2b3b48e0a427fa8da7ba5143bf40
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2720894a0c9398314cecc93d46ae5c361788de8d1908781b48e4915d7b52beca
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ea975816aac3d40a8b3d99eca8a891c93c967ac80c950988bae3dd44c17f8cc
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:547bb396072d7ce130c2b2f0586bfd93bbc9d358ed6e66e47535cd097a9d5243
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35cbb8ba5802e44e1ce90c535d286211f64f07ac10efce360c20ea86aa5c0fe7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c79b2a118092e551c5c318e5a915708d0417938e7e99546a1fe499d26306ef7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.007787834140181,
5
  "eval_steps": 100,
6
- "global_step": 30900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4951,6 +4951,38 @@
4951
  "eval_samples_per_second": 25.602,
4952
  "eval_steps_per_second": 3.201,
4953
  "step": 30900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4954
  }
4955
  ],
4956
  "logging_steps": 100,
@@ -4958,7 +4990,7 @@
4958
  "num_input_tokens_seen": 0,
4959
  "num_train_epochs": 30,
4960
  "save_steps": 100,
4961
- "total_flos": 3.386567819260411e+20,
4962
  "train_batch_size": 8,
4963
  "trial_name": null,
4964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.091980635655652,
5
  "eval_steps": 100,
6
+ "global_step": 31100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4951
  "eval_samples_per_second": 25.602,
4952
  "eval_steps_per_second": 3.201,
4953
  "step": 30900
4954
+ },
4955
+ {
4956
+ "epoch": 13.05,
4957
+ "grad_norm": 3.738598585128784,
4958
+ "learning_rate": 2.845724381625442e-05,
4959
+ "loss": 0.9841,
4960
+ "step": 31000
4961
+ },
4962
+ {
4963
+ "epoch": 13.05,
4964
+ "eval_cer": 0.4061705643111337,
4965
+ "eval_loss": 2.7012014389038086,
4966
+ "eval_runtime": 394.4654,
4967
+ "eval_samples_per_second": 24.027,
4968
+ "eval_steps_per_second": 3.004,
4969
+ "step": 31000
4970
+ },
4971
+ {
4972
+ "epoch": 13.09,
4973
+ "grad_norm": 3.3957040309906006,
4974
+ "learning_rate": 2.8386572438162544e-05,
4975
+ "loss": 1.444,
4976
+ "step": 31100
4977
+ },
4978
+ {
4979
+ "epoch": 13.09,
4980
+ "eval_cer": 0.3994588596456924,
4981
+ "eval_loss": 2.732797861099243,
4982
+ "eval_runtime": 371.9235,
4983
+ "eval_samples_per_second": 25.484,
4984
+ "eval_steps_per_second": 3.186,
4985
+ "step": 31100
4986
  }
4987
  ],
4988
  "logging_steps": 100,
 
4990
  "num_input_tokens_seen": 0,
4991
  "num_train_epochs": 30,
4992
  "save_steps": 100,
4993
+ "total_flos": 3.408482296230056e+20,
4994
  "train_batch_size": 8,
4995
  "trial_name": null,
4996
  "trial_params": null