ashanhr commited on
Commit
431e9a6
1 Parent(s): f69971a

Training in progress, step 21400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64d2e194abb4d33bd2df007c3f2dfe7c3090b6d911c750db68f03541f777960a
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd295564c886cab31b38fdc522144c8184d26012f8017f463f8b19068b69ce54
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a956ba41ba48f1e1424ba9b1b15a3ff3f56d507463416d550ee26a3eaf57770b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10129ed8317e1eb717c2b209ed001fa2e165c880f232b8ab7fc6026685e36a65
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc981ba432a5dc7f3000d740be2b6cd1bdf1ec48f451e86e92d2708e7dd2aa39
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5ca3cb0ab23b9c64be740c79b07d64816cd7d9490aeeae08ccc4a3f3b37ddc
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb8ac8461488f7d31ff771b67510406fd1847a4a10da6092663344858ebfa7ed
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a20e1a39d049694d044e652201b8a810f3562871d67cc396aaad5e0bbc2f803
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62c80163459f707d7a13e0f4c1cac2205e273df5ba9942f544868dc9dce2b481
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48225e4ec8a66fcccd258aec5acd97fada08046f07bbc5a0c629141f0b9c9d7d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.924436960639865,
5
  "eval_steps": 100,
6
- "global_step": 21200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3399,6 +3399,38 @@
3399
  "eval_samples_per_second": 25.994,
3400
  "eval_steps_per_second": 3.25,
3401
  "step": 21200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3402
  }
3403
  ],
3404
  "logging_steps": 100,
@@ -3406,7 +3438,7 @@
3406
  "num_input_tokens_seen": 0,
3407
  "num_train_epochs": 30,
3408
  "save_steps": 100,
3409
- "total_flos": 2.3225360192155995e+20,
3410
  "train_batch_size": 8,
3411
  "trial_name": null,
3412
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.008629762155335,
5
  "eval_steps": 100,
6
+ "global_step": 21400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3399
  "eval_samples_per_second": 25.994,
3400
  "eval_steps_per_second": 3.25,
3401
  "step": 21200
3402
+ },
3403
+ {
3404
+ "epoch": 8.97,
3405
+ "grad_norm": 3.520254135131836,
3406
+ "learning_rate": 3.53095406360424e-05,
3407
+ "loss": 1.2819,
3408
+ "step": 21300
3409
+ },
3410
+ {
3411
+ "epoch": 8.97,
3412
+ "eval_cer": 0.44700784091353485,
3413
+ "eval_loss": 1.840844988822937,
3414
+ "eval_runtime": 393.1165,
3415
+ "eval_samples_per_second": 24.11,
3416
+ "eval_steps_per_second": 3.014,
3417
+ "step": 21300
3418
+ },
3419
+ {
3420
+ "epoch": 9.01,
3421
+ "grad_norm": 1.5368082523345947,
3422
+ "learning_rate": 3.523886925795053e-05,
3423
+ "loss": 1.2587,
3424
+ "step": 21400
3425
+ },
3426
+ {
3427
+ "epoch": 9.01,
3428
+ "eval_cer": 0.43732890774705724,
3429
+ "eval_loss": 2.2236878871917725,
3430
+ "eval_runtime": 363.5106,
3431
+ "eval_samples_per_second": 26.074,
3432
+ "eval_steps_per_second": 3.26,
3433
+ "step": 21400
3434
  }
3435
  ],
3436
  "logging_steps": 100,
 
3438
  "num_input_tokens_seen": 0,
3439
  "num_train_epochs": 30,
3440
  "save_steps": 100,
3441
+ "total_flos": 2.3455923104904626e+20,
3442
  "train_batch_size": 8,
3443
  "trial_name": null,
3444
  "trial_params": null