ashanhr commited on
Commit
897b721
1 Parent(s): dd7c594

Training in progress, step 39100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86aedaea68068dd5c1a8b421f702958fe0acd2fbb8823da4823612fb4e7c45bf
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59cf012cca14e7c479066e2ff0a5e1f204267b57f9816ba1aa6ee55cc5d6f9bf
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727182a7c870435209c98f44944156abd6a6c3261ccb96daf87907a574c1b37b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0416e5725a213d2dafe05a9938222b9ad2625ab52c5100cdd7de652f9a9f5a
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93a07f3eb542190ed23f7b40d5f4678b80bd5b4ba91438888eb3d3cb6fc3186e
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f668cec0befb5905f1774c4110fe903320e1b8e768261c233cddbf5d15f6e2
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91e28ae43f1dd07d9a9ddf4073e22adb6e7929806ef21e48eb3b580f30a7e559
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a87a8feee502953668db6e272529785853c44ef4f1789f772c9d4e4c1dd6e44
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e9da594946dcc239f89ecda3fc48286b363af1118506368ef79741edf876cc1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19363ce1cc072264c627cb668ae2097c1f0e504b938ed74dfd1914e30a16032
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.375499894759,
5
  "eval_steps": 100,
6
- "global_step": 38900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6231,6 +6231,38 @@
6231
  "eval_samples_per_second": 24.861,
6232
  "eval_steps_per_second": 3.108,
6233
  "step": 38900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6234
  }
6235
  ],
6236
  "logging_steps": 100,
@@ -6238,7 +6270,7 @@
6238
  "num_input_tokens_seen": 0,
6239
  "num_train_epochs": 30,
6240
  "save_steps": 100,
6241
- "total_flos": 4.262537185081538e+20,
6242
  "train_batch_size": 8,
6243
  "trial_name": null,
6244
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.45969269627447,
5
  "eval_steps": 100,
6
+ "global_step": 39100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6231
  "eval_samples_per_second": 24.861,
6232
  "eval_steps_per_second": 3.108,
6233
  "step": 38900
6234
+ },
6235
+ {
6236
+ "epoch": 16.42,
6237
+ "grad_norm": 1.9076517820358276,
6238
+ "learning_rate": 2.2804240282685513e-05,
6239
+ "loss": 0.7521,
6240
+ "step": 39000
6241
+ },
6242
+ {
6243
+ "epoch": 16.42,
6244
+ "eval_cer": 0.3758725704900082,
6245
+ "eval_loss": 1.847259759902954,
6246
+ "eval_runtime": 394.9054,
6247
+ "eval_samples_per_second": 24.001,
6248
+ "eval_steps_per_second": 3.001,
6249
+ "step": 39000
6250
+ },
6251
+ {
6252
+ "epoch": 16.46,
6253
+ "grad_norm": 7.669488906860352,
6254
+ "learning_rate": 2.273356890459364e-05,
6255
+ "loss": 0.7523,
6256
+ "step": 39100
6257
+ },
6258
+ {
6259
+ "epoch": 16.46,
6260
+ "eval_cer": 0.3722576356028313,
6261
+ "eval_loss": 1.65703284740448,
6262
+ "eval_runtime": 378.1244,
6263
+ "eval_samples_per_second": 25.066,
6264
+ "eval_steps_per_second": 3.134,
6265
+ "step": 39100
6266
  }
6267
  ],
6268
  "logging_steps": 100,
 
6270
  "num_input_tokens_seen": 0,
6271
  "num_train_epochs": 30,
6272
  "save_steps": 100,
6273
+ "total_flos": 4.284622492226139e+20,
6274
  "train_batch_size": 8,
6275
  "trial_name": null,
6276
  "trial_params": null