ashanhr commited on
Commit
8d71147
1 Parent(s): ba493c0

Training in progress, step 27500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e15296cc7fc7c22faf537cf2d8c8189212cd4050aec0cc26ecd5ca6b2b31430c
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2990a3bb6d69e18c103cd2337804d461c6c67326e8e0f062fe20b13112717c31
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efb178d8b8052b34bfc9ce5171bddbd008d7445958fd6793e6c78c28460f94e3
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f96081b3cdc78cf713fd98ff997e0bb9a91ae50630385c65abc9cf9314347b
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:616fbff97ed13f412ac2118b023cc68e97d2cdaa667165e80291c9b722839e9c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7000394dc2c4f6debb1651edfed8e64b05b328880d95d9bde162788468b1edcb
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e1fa851642b5df9872122f6cd49ffbd3855aa0c135f0547c13649b70bce71d4
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f83e3367e58d9a4431dc150c90b3ffeb9508f969b56e80c75c390795e94f6d91
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1556b06bd9834de95519bce0dfbc04d14a4931ce96de22038d246f85d978575d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9171e687a5332bf79e87905511cf6fd0514a885fad349a7f1665c05a38dc32e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.492317406861714,
5
  "eval_steps": 100,
6
- "global_step": 27300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4375,6 +4375,38 @@
4375
  "eval_samples_per_second": 24.145,
4376
  "eval_steps_per_second": 3.019,
4377
  "step": 27300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4378
  }
4379
  ],
4380
  "logging_steps": 100,
@@ -4382,7 +4414,7 @@
4382
  "num_input_tokens_seen": 0,
4383
  "num_train_epochs": 30,
4384
  "save_steps": 100,
4385
- "total_flos": 2.992071149315134e+20,
4386
  "train_batch_size": 8,
4387
  "trial_name": null,
4388
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.576510208377183,
5
  "eval_steps": 100,
6
+ "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4375
  "eval_samples_per_second": 24.145,
4376
  "eval_steps_per_second": 3.019,
4377
  "step": 27300
4378
+ },
4379
+ {
4380
+ "epoch": 11.53,
4381
+ "grad_norm": 19.142953872680664,
4382
+ "learning_rate": 3.1e-05,
4383
+ "loss": 1.1284,
4384
+ "step": 27400
4385
+ },
4386
+ {
4387
+ "epoch": 11.53,
4388
+ "eval_cer": 0.43251877126432287,
4389
+ "eval_loss": 2.1405186653137207,
4390
+ "eval_runtime": 413.4099,
4391
+ "eval_samples_per_second": 22.926,
4392
+ "eval_steps_per_second": 2.866,
4393
+ "step": 27400
4394
+ },
4395
+ {
4396
+ "epoch": 11.58,
4397
+ "grad_norm": 9.353096961975098,
4398
+ "learning_rate": 3.092932862190813e-05,
4399
+ "loss": 1.3106,
4400
+ "step": 27500
4401
+ },
4402
+ {
4403
+ "epoch": 11.58,
4404
+ "eval_cer": 0.4250787024363537,
4405
+ "eval_loss": 2.7651588916778564,
4406
+ "eval_runtime": 372.5808,
4407
+ "eval_samples_per_second": 25.439,
4408
+ "eval_steps_per_second": 3.181,
4409
+ "step": 27500
4410
  }
4411
  ],
4412
  "logging_steps": 100,
 
4414
  "num_input_tokens_seen": 0,
4415
  "num_train_epochs": 30,
4416
  "save_steps": 100,
4417
+ "total_flos": 3.014141824593385e+20,
4418
  "train_batch_size": 8,
4419
  "trial_name": null,
4420
  "trial_params": null