ashanhr commited on
Commit
94240ad
1 Parent(s): a8efc61

Training in progress, step 56700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37b48cfd98f64c5d125f771e54c20395665d1d877037bac06b3572191009699e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb0638e9518badfd63abf2ece00d350144f9d41acccefe9bae1bc9a7e5e16b6
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44acee1aeb254e47b183795a7e974bb04e859045fbcb8a60cd68541f1f93e9ee
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa1f50c2149cedbbcdc0a0b3f97d9da7bdaa07ae30698972e4b2783c4e4cae3
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe2dee411cbfa983ddb90c55ee2a72b7fe583cabda1a874be6896ce43b84a43d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bae8989ab317e310c96be055cf193f107aac21ae74e30496cb700ed26fcca54
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d2921f22486aac351c57cbb7c2191104e5b51ae494fc326c3d7293b77f62dc6
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47efbf05c9bdc39e09c06488a9440a7d4c26b3f59083d9e5abc4e7e8931b8a48
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25166827dfb1f80c901f38f580504a29f3bfd9dd8b2e815ea7c1992907c058ce
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2262615ae76018b8db25bb87fb7e7da2c4f7f0d991b75e87912b6a8c588ade12
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.784466428120396,
5
  "eval_steps": 100,
6
- "global_step": 56500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9047,6 +9047,38 @@
9047
  "eval_samples_per_second": 23.576,
9048
  "eval_steps_per_second": 2.948,
9049
  "step": 56500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9050
  }
9051
  ],
9052
  "logging_steps": 100,
@@ -9054,7 +9086,7 @@
9054
  "num_input_tokens_seen": 0,
9055
  "num_train_epochs": 30,
9056
  "save_steps": 100,
9057
- "total_flos": 6.191168025761128e+20,
9058
  "train_batch_size": 8,
9059
  "trial_name": null,
9060
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.868659229635867,
5
  "eval_steps": 100,
6
+ "global_step": 56700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9047
  "eval_samples_per_second": 23.576,
9048
  "eval_steps_per_second": 2.948,
9049
  "step": 56500
9050
+ },
9051
+ {
9052
+ "epoch": 23.83,
9053
+ "grad_norm": 3.0282020568847656,
9054
+ "learning_rate": 1.0370318021201413e-05,
9055
+ "loss": 0.2639,
9056
+ "step": 56600
9057
+ },
9058
+ {
9059
+ "epoch": 23.83,
9060
+ "eval_cer": 0.33265222322161825,
9061
+ "eval_loss": 2.564777374267578,
9062
+ "eval_runtime": 430.1757,
9063
+ "eval_samples_per_second": 22.033,
9064
+ "eval_steps_per_second": 2.755,
9065
+ "step": 56600
9066
+ },
9067
+ {
9068
+ "epoch": 23.87,
9069
+ "grad_norm": 1.940981388092041,
9070
+ "learning_rate": 1.0299646643109541e-05,
9071
+ "loss": 0.2606,
9072
+ "step": 56700
9073
+ },
9074
+ {
9075
+ "epoch": 23.87,
9076
+ "eval_cer": 0.33390608501818464,
9077
+ "eval_loss": 2.5213394165039062,
9078
+ "eval_runtime": 397.5174,
9079
+ "eval_samples_per_second": 23.843,
9080
+ "eval_steps_per_second": 2.981,
9081
+ "step": 56700
9082
  }
9083
  ],
9084
  "logging_steps": 100,
 
9086
  "num_input_tokens_seen": 0,
9087
  "num_train_epochs": 30,
9088
  "save_steps": 100,
9089
+ "total_flos": 6.212938084955773e+20,
9090
  "train_batch_size": 8,
9091
  "trial_name": null,
9092
  "trial_params": null