ashanhr commited on
Commit
bfb5810
1 Parent(s): 0d1c3ff

Training in progress, step 20600, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:424006b63705ff8d128ce03cf112d0f0202b9b53563618c80069b74aadf6b121
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d497e2e9992b95123edca43ea44a4f202dad1f6f849d37d3412aa944ee8b758
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:486219a9cbd763fd42552ee2a1a2647b831dc1b08e3f181c8b5b51fe133f0228
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbfddaab2cb6b8976eb7300b0190223785e3130315d50c050c962fc69fe2a84
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0908165977f8c3d9489a849f85d9268f712bef0a616c664c5b76ba43a71c7f3d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d1b4164fc962c7aec4eee9c6701be1ec088b6f308360e6a9ee1734436272b1
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae9aa79a869a104ab7e27c688f3cfabb1a5b016e6c0d9194e567985352b95f11
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c4ec211a6cd6bd5b7539fcf647a5b942b4dfee369f2618d5c30e0877818725f
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:006c5e8305ef9dc1fe3403e1173068b8cb7f990d0fc5d4b955050dc8157ec06d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05bf1a7e422f9aff57863d8882bd203339bb8d9f359aa0d93a3570a059702d97
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.587665754577984,
5
  "eval_steps": 100,
6
- "global_step": 20400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3271,6 +3271,38 @@
3271
  "eval_samples_per_second": 26.076,
3272
  "eval_steps_per_second": 3.26,
3273
  "step": 20400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3274
  }
3275
  ],
3276
  "logging_steps": 100,
@@ -3278,7 +3310,7 @@
3278
  "num_input_tokens_seen": 0,
3279
  "num_train_epochs": 30,
3280
  "save_steps": 100,
3281
- "total_flos": 2.2352658822253032e+20,
3282
  "train_batch_size": 8,
3283
  "trial_name": null,
3284
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.671858556093454,
5
  "eval_steps": 100,
6
+ "global_step": 20600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3271
  "eval_samples_per_second": 26.076,
3272
  "eval_steps_per_second": 3.26,
3273
  "step": 20400
3274
+ },
3275
+ {
3276
+ "epoch": 8.63,
3277
+ "grad_norm": 3.379791498184204,
3278
+ "learning_rate": 3.587491166077739e-05,
3279
+ "loss": 1.4381,
3280
+ "step": 20500
3281
+ },
3282
+ {
3283
+ "epoch": 8.63,
3284
+ "eval_cer": 0.4450402800046928,
3285
+ "eval_loss": 2.9096951484680176,
3286
+ "eval_runtime": 384.973,
3287
+ "eval_samples_per_second": 24.62,
3288
+ "eval_steps_per_second": 3.078,
3289
+ "step": 20500
3290
+ },
3291
+ {
3292
+ "epoch": 8.67,
3293
+ "grad_norm": 2.5090067386627197,
3294
+ "learning_rate": 3.580424028268551e-05,
3295
+ "loss": 1.6658,
3296
+ "step": 20600
3297
+ },
3298
+ {
3299
+ "epoch": 8.67,
3300
+ "eval_cer": 0.4514855695905518,
3301
+ "eval_loss": 2.3211405277252197,
3302
+ "eval_runtime": 367.2057,
3303
+ "eval_samples_per_second": 25.811,
3304
+ "eval_steps_per_second": 3.227,
3305
+ "step": 20600
3306
  }
3307
  ],
3308
  "logging_steps": 100,
 
3310
  "num_input_tokens_seen": 0,
3311
  "num_train_epochs": 30,
3312
  "save_steps": 100,
3313
+ "total_flos": 2.2571911239866106e+20,
3314
  "train_batch_size": 8,
3315
  "trial_name": null,
3316
  "trial_params": null