ashanhr commited on
Commit
9a18542
1 Parent(s): f890b76

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13d9360e86d6728b712f59c7c4708bbf9b1a6e025cbe69d32a8c222aedcfc489
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69774389453b3ace50a775638e2f5918335f227e54490308f1ecc9f049c680b9
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d17c27964e0a9bb4db1d4e8f9a66d19db2579e7d1a4682579269e4387b23ca
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb25e6a96a19cf513e1cbf5b186f74f8277610735262c93589f5210fe9a995e
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb443bec736266f1308adc945a429fcbd90b82f2937dc6e73185a9dc0e104218
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f40b795846ead1c0623ee579805fb3cfc72061e674100f80e71e636c069d35
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fad3bce82b1ec9d0e105b84287a1d9a5af1cdbaae50b9ee14dce6c13e9ad62a7
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88808367e8c9aa08f2d1d94fe210aa146f1db378f3f747361538dc647479930a
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8004b4b6a8c520e4ed293ea3a4cb135c114f118b79d1b79d758805030484f7fb
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0d9587951dad3129d971e7ba722052a3740ec7e6470902edd12e67fb1998ca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.335087350031571,
5
  "eval_steps": 100,
6
- "global_step": 19800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3175,6 +3175,38 @@
3175
  "eval_samples_per_second": 26.038,
3176
  "eval_steps_per_second": 3.255,
3177
  "step": 19800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3178
  }
3179
  ],
3180
  "logging_steps": 100,
@@ -3182,7 +3214,7 @@
3182
  "num_input_tokens_seen": 0,
3183
  "num_train_epochs": 30,
3184
  "save_steps": 100,
3185
- "total_flos": 2.169673140779638e+20,
3186
  "train_batch_size": 8,
3187
  "trial_name": null,
3188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.419280151547042,
5
  "eval_steps": 100,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3175
  "eval_samples_per_second": 26.038,
3176
  "eval_steps_per_second": 3.255,
3177
  "step": 19800
3178
+ },
3179
+ {
3180
+ "epoch": 8.38,
3181
+ "grad_norm": 1.848604679107666,
3182
+ "learning_rate": 3.629893992932862e-05,
3183
+ "loss": 2.2543,
3184
+ "step": 19900
3185
+ },
3186
+ {
3187
+ "epoch": 8.38,
3188
+ "eval_cer": 0.4581728325055727,
3189
+ "eval_loss": 1.8045251369476318,
3190
+ "eval_runtime": 381.1374,
3191
+ "eval_samples_per_second": 24.868,
3192
+ "eval_steps_per_second": 3.109,
3193
+ "step": 19900
3194
+ },
3195
+ {
3196
+ "epoch": 8.42,
3197
+ "grad_norm": 34.23902893066406,
3198
+ "learning_rate": 3.622826855123675e-05,
3199
+ "loss": 1.313,
3200
+ "step": 20000
3201
+ },
3202
+ {
3203
+ "epoch": 8.42,
3204
+ "eval_cer": 0.4448887411520864,
3205
+ "eval_loss": 2.140085458755493,
3206
+ "eval_runtime": 364.9023,
3207
+ "eval_samples_per_second": 25.974,
3208
+ "eval_steps_per_second": 3.247,
3209
+ "step": 20000
3210
  }
3211
  ],
3212
  "logging_steps": 100,
 
3214
  "num_input_tokens_seen": 0,
3215
  "num_train_epochs": 30,
3216
  "save_steps": 100,
3217
+ "total_flos": 2.1918019268617958e+20,
3218
  "train_batch_size": 8,
3219
  "trial_name": null,
3220
  "trial_params": null