ashanhr commited on
Commit
8682633
1 Parent(s): 85fe6de

Training in progress, step 22100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f528d8b47297dcd25f0ac457540cb950397ab08f61bd370ae0a0f0b358fcf217
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2effce087c71e7b4f5411ac336511daf6bbbe1e3ecbac9348a8792a025918d9
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6027792961a4f096acc160c9d7140e3a397456675e6172019e0257ab004c55b1
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a22c61a8979b9f44e4c8c22f66fbed3c19bc88f463a4f974eac0e1c4d0de281
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04f2845621466e928fb57dc117cf81e1ef42f0d8d81b0e11e00af6d149a1466a
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112820ec059ab52f61273d81ee9a8ae1d15140adca820a8935a7074524ca4d69
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c985d2a2c043a50fddf4712f00639a3630bd1174d543d92b3ca489c07fa3bb
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699594f94310cf08af521c6d50d89fc54dec73b61991d34be34e9fc33928c3be
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b891928e38cf41772d02e62b28af04076db5001b27d6077652872482d64ab94
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e6b1f4714aff0d0b0d7f3c9208419cd1fd98d5d42618c80a7c7b656aa11290a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.219111765944012,
5
  "eval_steps": 100,
6
- "global_step": 21900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3511,6 +3511,38 @@
3511
  "eval_samples_per_second": 25.649,
3512
  "eval_steps_per_second": 3.207,
3513
  "step": 21900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3514
  }
3515
  ],
3516
  "logging_steps": 100,
@@ -3518,7 +3550,7 @@
3518
  "num_input_tokens_seen": 0,
3519
  "num_train_epochs": 30,
3520
  "save_steps": 100,
3521
- "total_flos": 2.3997519912106135e+20,
3522
  "train_batch_size": 8,
3523
  "trial_name": null,
3524
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.303304567459483,
5
  "eval_steps": 100,
6
+ "global_step": 22100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3511
  "eval_samples_per_second": 25.649,
3512
  "eval_steps_per_second": 3.207,
3513
  "step": 21900
3514
+ },
3515
+ {
3516
+ "epoch": 9.26,
3517
+ "grad_norm": 2.3700144290924072,
3518
+ "learning_rate": 3.481554770318021e-05,
3519
+ "loss": 1.3379,
3520
+ "step": 22000
3521
+ },
3522
+ {
3523
+ "epoch": 9.26,
3524
+ "eval_cer": 0.4379032888819366,
3525
+ "eval_loss": 1.7394232749938965,
3526
+ "eval_runtime": 383.407,
3527
+ "eval_samples_per_second": 24.72,
3528
+ "eval_steps_per_second": 3.091,
3529
+ "step": 22000
3530
+ },
3531
+ {
3532
+ "epoch": 9.3,
3533
+ "grad_norm": 2.381225824356079,
3534
+ "learning_rate": 3.4744876325088346e-05,
3535
+ "loss": 2.2422,
3536
+ "step": 22100
3537
+ },
3538
+ {
3539
+ "epoch": 9.3,
3540
+ "eval_cer": 0.43050965937976615,
3541
+ "eval_loss": 1.691564917564392,
3542
+ "eval_runtime": 362.7906,
3543
+ "eval_samples_per_second": 26.125,
3544
+ "eval_steps_per_second": 3.266,
3545
+ "step": 22100
3546
  }
3547
  ],
3548
  "logging_steps": 100,
 
3550
  "num_input_tokens_seen": 0,
3551
  "num_train_epochs": 30,
3552
  "save_steps": 100,
3553
+ "total_flos": 2.4217119796381745e+20,
3554
  "train_batch_size": 8,
3555
  "trial_name": null,
3556
  "trial_params": null