ashanhr commited on
Commit
6230789
·
verified ·
1 Parent(s): 6d6c9e4

Training in progress, step 32900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc051f6398a756522cc490f0a8f154f42b3f2b30d98bf7d60f11ac1888cdb3da
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43cf24c50a2fb90dff7e6f39ca95373b4b3a194b6e7b5ea3450b567d1cf2acd4
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02021cbbbec3246b6712aecc4cd14c4d4dfc08e3dbc00c5473d548a791306375
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4045e3043dff6c75ceb1ceaf8651a36ad38e032419f1934e6b35b395ed37e495
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1539346c05dd0db53e003a5edfb0638d6fd9baf85cb3d6708792dcf6b2d6967
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:830da9de182f0cddc597083bf4f5e081033ce68dba8352d3fc4bd7b82acf4629
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c999f4c1d5bb7ee3e6b99ed7a27056b2b32e84ae4ae775052bdec993bdba558
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bf72c9582e89b0605330cab03a1ed6574afa16c96e8aa002547f90558e0a41
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f278373e69e9ebd2b300373366d8c349b14d9205b631b0731c93a52b7c91f8b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fea4ba0cc40f8b6ca722c0ae8b27293a5270b2e8eb60bc080bc213006fdf44bc
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.765523047779414,
5
  "eval_steps": 100,
6
- "global_step": 32700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5239,6 +5239,38 @@
5239
  "eval_samples_per_second": 25.63,
5240
  "eval_steps_per_second": 3.204,
5241
  "step": 32700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5242
  }
5243
  ],
5244
  "logging_steps": 100,
@@ -5246,7 +5278,7 @@
5246
  "num_input_tokens_seen": 0,
5247
  "num_train_epochs": 30,
5248
  "save_steps": 100,
5249
- "total_flos": 3.582774517736169e+20,
5250
  "train_batch_size": 8,
5251
  "trial_name": null,
5252
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.849715849294885,
5
  "eval_steps": 100,
6
+ "global_step": 32900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5239
  "eval_samples_per_second": 25.63,
5240
  "eval_steps_per_second": 3.204,
5241
  "step": 32700
5242
+ },
5243
+ {
5244
+ "epoch": 13.81,
5245
+ "grad_norm": 1.3860234022140503,
5246
+ "learning_rate": 2.718515901060071e-05,
5247
+ "loss": 1.7756,
5248
+ "step": 32800
5249
+ },
5250
+ {
5251
+ "epoch": 13.81,
5252
+ "eval_cer": 0.41578839310156035,
5253
+ "eval_loss": 2.565376043319702,
5254
+ "eval_runtime": 390.8183,
5255
+ "eval_samples_per_second": 24.252,
5256
+ "eval_steps_per_second": 3.032,
5257
+ "step": 32800
5258
+ },
5259
+ {
5260
+ "epoch": 13.85,
5261
+ "grad_norm": 9.211983680725098,
5262
+ "learning_rate": 2.7114487632508833e-05,
5263
+ "loss": 1.0164,
5264
+ "step": 32900
5265
+ },
5266
+ {
5267
+ "epoch": 13.85,
5268
+ "eval_cer": 0.39727621133315083,
5269
+ "eval_loss": 2.448713779449463,
5270
+ "eval_runtime": 372.957,
5271
+ "eval_samples_per_second": 25.413,
5272
+ "eval_steps_per_second": 3.177,
5273
+ "step": 32900
5274
  }
5275
  ],
5276
  "logging_steps": 100,
 
5278
  "num_input_tokens_seen": 0,
5279
  "num_train_epochs": 30,
5280
  "save_steps": 100,
5281
+ "total_flos": 3.6049959871612964e+20,
5282
  "train_batch_size": 8,
5283
  "trial_name": null,
5284
  "trial_params": null