ashanhr commited on
Commit
cc425d6
1 Parent(s): e036272

Training in progress, step 52300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a160b4e175184a931e59f6e0e078e62570ed360295c0fe949679c0e727f5eea
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b49f0d391833f22a3a65f3991676e2a987a97b88cd0ad802eb1db8d1e6fe024
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:362756a97ee496019c94541204764d63f92ffafe15a248f8434077d701710266
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ea0d5906e0a8cad3a20b57981e4b95dcaa8043c17d437ec383528d77b70576
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1af97be2351717a648f3ed627e3a5da1be85bd899722f95b48558a241429b0c7
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7486ea49baf77b45c7355012daf8b9a61b16da9ea0572d25ecdf994887dc4344
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5d06a7cdc46e4a528aeb9bb66bf58e9c1dcb3edea8772b30cb812147232c3dd
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfcdd93522276fcda8f039579079d552064ebfca8be5aa43e5f90d00df30ae75
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff5d1aa72f6daaa98e8456979d83438a50fc20467c64d414fc9148e60e30d500
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc85812b74cb6bb673f1e4f6b9869a44b0f2cf4d744fa3459bb02f98086cf88
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.932224794780048,
5
  "eval_steps": 100,
6
- "global_step": 52100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8343,6 +8343,38 @@
8343
  "eval_samples_per_second": 24.409,
8344
  "eval_steps_per_second": 3.052,
8345
  "step": 52100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8346
  }
8347
  ],
8348
  "logging_steps": 100,
@@ -8350,7 +8382,7 @@
8350
  "num_input_tokens_seen": 0,
8351
  "num_train_epochs": 30,
8352
  "save_steps": 100,
8353
- "total_flos": 5.7089616015663346e+20,
8354
  "train_batch_size": 8,
8355
  "trial_name": null,
8356
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.01641759629552,
5
  "eval_steps": 100,
6
+ "global_step": 52300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8343
  "eval_samples_per_second": 24.409,
8344
  "eval_steps_per_second": 3.052,
8345
  "step": 52100
8346
+ },
8347
+ {
8348
+ "epoch": 21.97,
8349
+ "grad_norm": 1.7431000471115112,
8350
+ "learning_rate": 1.3479858657243816e-05,
8351
+ "loss": 0.3401,
8352
+ "step": 52200
8353
+ },
8354
+ {
8355
+ "epoch": 21.97,
8356
+ "eval_cer": 0.3359225294278675,
8357
+ "eval_loss": 2.4058258533477783,
8358
+ "eval_runtime": 412.0183,
8359
+ "eval_samples_per_second": 23.004,
8360
+ "eval_steps_per_second": 2.876,
8361
+ "step": 52200
8362
+ },
8363
+ {
8364
+ "epoch": 22.02,
8365
+ "grad_norm": 2.714993953704834,
8366
+ "learning_rate": 1.3409187279151944e-05,
8367
+ "loss": 0.3222,
8368
+ "step": 52300
8369
+ },
8370
+ {
8371
+ "epoch": 22.02,
8372
+ "eval_cer": 0.33601051972938095,
8373
+ "eval_loss": 2.8305118083953857,
8374
+ "eval_runtime": 391.1586,
8375
+ "eval_samples_per_second": 24.231,
8376
+ "eval_steps_per_second": 3.029,
8377
+ "step": 52300
8378
  }
8379
  ],
8380
  "logging_steps": 100,
 
8382
  "num_input_tokens_seen": 0,
8383
  "num_train_epochs": 30,
8384
  "save_steps": 100,
8385
+ "total_flos": 5.7311072805771385e+20,
8386
  "train_batch_size": 8,
8387
  "trial_name": null,
8388
  "trial_params": null