ashanhr commited on
Commit
65b4f12
1 Parent(s): 3c33cb1

Training in progress, step 32700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09eceed9906bfe27bd3680afd50c41121c73b82c7cd5ef345c69fad35c0ab8b8
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc051f6398a756522cc490f0a8f154f42b3f2b30d98bf7d60f11ac1888cdb3da
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bf99370e0c7555ecb290c63576152db41e5b2dbb69d0454ac7e5f89074f7bd4
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02021cbbbec3246b6712aecc4cd14c4d4dfc08e3dbc00c5473d548a791306375
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e617199afd6c3a950fb46336c901b1ff9024ac6556123985ed6b261b689855f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1539346c05dd0db53e003a5edfb0638d6fd9baf85cb3d6708792dcf6b2d6967
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd931d9164958a9409e10cbb2086014b2f7f92c6de1fd6882064a496d5c52211
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c999f4c1d5bb7ee3e6b99ed7a27056b2b32e84ae4ae775052bdec993bdba558
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2b9126a6c1c8e3c86ea30228e288a1b5e1ab3597232760fc5ccce02139d2524
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f278373e69e9ebd2b300373366d8c349b14d9205b631b0731c93a52b7c91f8b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.681330246263945,
5
  "eval_steps": 100,
6
- "global_step": 32500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5207,6 +5207,38 @@
5207
  "eval_samples_per_second": 25.515,
5208
  "eval_steps_per_second": 3.19,
5209
  "step": 32500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5210
  }
5211
  ],
5212
  "logging_steps": 100,
@@ -5214,7 +5246,7 @@
5214
  "num_input_tokens_seen": 0,
5215
  "num_train_epochs": 30,
5216
  "save_steps": 100,
5217
- "total_flos": 3.560483539431464e+20,
5218
  "train_batch_size": 8,
5219
  "trial_name": null,
5220
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.765523047779414,
5
  "eval_steps": 100,
6
+ "global_step": 32700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5207
  "eval_samples_per_second": 25.515,
5208
  "eval_steps_per_second": 3.19,
5209
  "step": 32500
5210
+ },
5211
+ {
5212
+ "epoch": 13.72,
5213
+ "grad_norm": 2.935277223587036,
5214
+ "learning_rate": 2.7326501766784452e-05,
5215
+ "loss": 1.0508,
5216
+ "step": 32600
5217
+ },
5218
+ {
5219
+ "epoch": 13.72,
5220
+ "eval_cer": 0.40732665910601856,
5221
+ "eval_loss": 2.4883534908294678,
5222
+ "eval_runtime": 395.2638,
5223
+ "eval_samples_per_second": 23.979,
5224
+ "eval_steps_per_second": 2.998,
5225
+ "step": 32600
5226
+ },
5227
+ {
5228
+ "epoch": 13.77,
5229
+ "grad_norm": 2.7527854442596436,
5230
+ "learning_rate": 2.7255830388692584e-05,
5231
+ "loss": 1.0314,
5232
+ "step": 32700
5233
+ },
5234
+ {
5235
+ "epoch": 13.77,
5236
+ "eval_cer": 0.40216944976731456,
5237
+ "eval_loss": 2.4499011039733887,
5238
+ "eval_runtime": 369.8082,
5239
+ "eval_samples_per_second": 25.63,
5240
+ "eval_steps_per_second": 3.204,
5241
+ "step": 32700
5242
  }
5243
  ],
5244
  "logging_steps": 100,
 
5246
  "num_input_tokens_seen": 0,
5247
  "num_train_epochs": 30,
5248
  "save_steps": 100,
5249
+ "total_flos": 3.582774517736169e+20,
5250
  "train_batch_size": 8,
5251
  "trial_name": null,
5252
  "trial_params": null