ashanhr commited on
Commit
b689ed5
·
verified ·
1 Parent(s): 5dd940f

Training in progress, step 20800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d497e2e9992b95123edca43ea44a4f202dad1f6f849d37d3412aa944ee8b758
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e4814097f95292581f1fa41351bbc66ced1628a34f7e406c1d973bdfa268c2
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebbfddaab2cb6b8976eb7300b0190223785e3130315d50c050c962fc69fe2a84
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4281b3413ac6bfae58364c1649b3032d24b966004ceed96933c7ba6a5d6273b
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50d1b4164fc962c7aec4eee9c6701be1ec088b6f308360e6a9ee1734436272b1
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f12f5880c497ab5a7a2b205f32136065c330a41316d4e0177eb04fd5d02d93
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c4ec211a6cd6bd5b7539fcf647a5b942b4dfee369f2618d5c30e0877818725f
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6484a98615c86c255086193fe3b7042881674afdf95988ec90167572328c2cbb
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05bf1a7e422f9aff57863d8882bd203339bb8d9f359aa0d93a3570a059702d97
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c543c9e610e453a76673ad3939ff1bdf7432f91d895bb0c3b414985ca5e44f7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.671858556093454,
5
  "eval_steps": 100,
6
- "global_step": 20600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3303,6 +3303,38 @@
3303
  "eval_samples_per_second": 25.811,
3304
  "eval_steps_per_second": 3.227,
3305
  "step": 20600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3306
  }
3307
  ],
3308
  "logging_steps": 100,
@@ -3310,7 +3342,7 @@
3310
  "num_input_tokens_seen": 0,
3311
  "num_train_epochs": 30,
3312
  "save_steps": 100,
3313
- "total_flos": 2.2571911239866106e+20,
3314
  "train_batch_size": 8,
3315
  "trial_name": null,
3316
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.756051357608925,
5
  "eval_steps": 100,
6
+ "global_step": 20800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3303
  "eval_samples_per_second": 25.811,
3304
  "eval_steps_per_second": 3.227,
3305
  "step": 20600
3306
+ },
3307
+ {
3308
+ "epoch": 8.71,
3309
+ "grad_norm": 2.2183985710144043,
3310
+ "learning_rate": 3.573356890459364e-05,
3311
+ "loss": 2.1266,
3312
+ "step": 20700
3313
+ },
3314
+ {
3315
+ "epoch": 8.71,
3316
+ "eval_cer": 0.45651323765202767,
3317
+ "eval_loss": 1.70187246799469,
3318
+ "eval_runtime": 382.2617,
3319
+ "eval_samples_per_second": 24.795,
3320
+ "eval_steps_per_second": 3.1,
3321
+ "step": 20700
3322
+ },
3323
+ {
3324
+ "epoch": 8.76,
3325
+ "grad_norm": 1.713640570640564,
3326
+ "learning_rate": 3.5662897526501774e-05,
3327
+ "loss": 1.3038,
3328
+ "step": 20800
3329
+ },
3330
+ {
3331
+ "epoch": 8.76,
3332
+ "eval_cer": 0.44218548355559034,
3333
+ "eval_loss": 1.8959708213806152,
3334
+ "eval_runtime": 362.6249,
3335
+ "eval_samples_per_second": 26.137,
3336
+ "eval_steps_per_second": 3.268,
3337
+ "step": 20800
3338
  }
3339
  ],
3340
  "logging_steps": 100,
 
3342
  "num_input_tokens_seen": 0,
3343
  "num_train_epochs": 30,
3344
  "save_steps": 100,
3345
+ "total_flos": 2.278855149050551e+20,
3346
  "train_batch_size": 8,
3347
  "trial_name": null,
3348
  "trial_params": null