ashanhr commited on
Commit
ff0c6cb
1 Parent(s): 14a0c6f

Training in progress, step 52700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9e3ec186d1506bf3f5a46e86055cacd6b0c9b4ffbe24417c8eef77757faea0c
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe1c93b8017d74a09b637d5419b72f561139b3c1af762e8562b3cb8786654a7
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9444eb6b2eef3ecb411f123c639c2a92b8042b5a5c5be1aa90832ac5c20bbba
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f313c4502ab1516bf134a75993c4c561fdee3ec545f99a82ba52cad86980eb
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be94356ea572d4e68c6fb375bc81fdb494cbc39b516053b8fc4f2f6588936802
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a976a4a597b8b9341405e32ca9ff17df9bc058b930b3fb15473565039dee635
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988f89765ff989d1f443819280677bc95db9d024f3412fe0f8825777a7b0fb01
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d694511c38cf842adf62f634f5a20eb55c12c93d605d719c21b8dcbbcf72b0
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd383b5b8f3f451872508e62000f649671b5a3de6d9b2cd72dc13514c55d7d6b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726de08061b13aa65db2732d53ac93e0334fa97527eefc3b67e0a6227d537660
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.100610397810986,
5
  "eval_steps": 100,
6
- "global_step": 52500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8407,6 +8407,38 @@
8407
  "eval_samples_per_second": 24.249,
8408
  "eval_steps_per_second": 3.032,
8409
  "step": 52500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8410
  }
8411
  ],
8412
  "logging_steps": 100,
@@ -8414,7 +8446,7 @@
8414
  "num_input_tokens_seen": 0,
8415
  "num_train_epochs": 30,
8416
  "save_steps": 100,
8417
- "total_flos": 5.753031817537913e+20,
8418
  "train_batch_size": 8,
8419
  "trial_name": null,
8420
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.184803199326456,
5
  "eval_steps": 100,
6
+ "global_step": 52700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8407
  "eval_samples_per_second": 24.249,
8408
  "eval_steps_per_second": 3.032,
8409
  "step": 52500
8410
+ },
8411
+ {
8412
+ "epoch": 22.14,
8413
+ "grad_norm": 2.155449151992798,
8414
+ "learning_rate": 1.3197173144876326e-05,
8415
+ "loss": 0.2968,
8416
+ "step": 52600
8417
+ },
8418
+ {
8419
+ "epoch": 22.14,
8420
+ "eval_cer": 0.3380514058894842,
8421
+ "eval_loss": 2.6296260356903076,
8422
+ "eval_runtime": 425.2856,
8423
+ "eval_samples_per_second": 22.286,
8424
+ "eval_steps_per_second": 2.786,
8425
+ "step": 52600
8426
+ },
8427
+ {
8428
+ "epoch": 22.18,
8429
+ "grad_norm": 11.153074264526367,
8430
+ "learning_rate": 1.3126501766784453e-05,
8431
+ "loss": 0.2936,
8432
+ "step": 52700
8433
+ },
8434
+ {
8435
+ "epoch": 22.18,
8436
+ "eval_cer": 0.33938836963748,
8437
+ "eval_loss": 2.600175142288208,
8438
+ "eval_runtime": 394.8419,
8439
+ "eval_samples_per_second": 24.005,
8440
+ "eval_steps_per_second": 3.001,
8441
+ "step": 52700
8442
  }
8443
  ],
8444
  "logging_steps": 100,
 
8446
  "num_input_tokens_seen": 0,
8447
  "num_train_epochs": 30,
8448
  "save_steps": 100,
8449
+ "total_flos": 5.7751950642202254e+20,
8450
  "train_batch_size": 8,
8451
  "trial_name": null,
8452
  "trial_params": null