ashanhr commited on
Commit
ea0c2f5
·
verified ·
1 Parent(s): c2a7b3d

Training in progress, step 27700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2990a3bb6d69e18c103cd2337804d461c6c67326e8e0f062fe20b13112717c31
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5a9b68d851de5d3211a5786bc155bb288dd74d58e4dc5493cda8e9bb8f5b66c
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3f96081b3cdc78cf713fd98ff997e0bb9a91ae50630385c65abc9cf9314347b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91c760806ff07f3048741537eb7db0ce9a9fc0545b24f11d5446b75977c0636
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7000394dc2c4f6debb1651edfed8e64b05b328880d95d9bde162788468b1edcb
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006fe2f9891b8bb244bfb635af4ee42dc360b27e8e2fbe6b729ce4508aa080ac
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83e3367e58d9a4431dc150c90b3ffeb9508f969b56e80c75c390795e94f6d91
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fad5d4b2449cf648eabcb3d279346b6662a87e99fb79ac46b0c8815722381f
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9171e687a5332bf79e87905511cf6fd0514a885fad349a7f1665c05a38dc32e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739f0eb8d8ec6d67ffc73cb7a845f894d3c8ae14810b720e71f07a82de397bb6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.576510208377183,
5
  "eval_steps": 100,
6
- "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4407,6 +4407,38 @@
4407
  "eval_samples_per_second": 25.439,
4408
  "eval_steps_per_second": 3.181,
4409
  "step": 27500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4410
  }
4411
  ],
4412
  "logging_steps": 100,
@@ -4414,7 +4446,7 @@
4414
  "num_input_tokens_seen": 0,
4415
  "num_train_epochs": 30,
4416
  "save_steps": 100,
4417
- "total_flos": 3.014141824593385e+20,
4418
  "train_batch_size": 8,
4419
  "trial_name": null,
4420
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.660703009892654,
5
  "eval_steps": 100,
6
+ "global_step": 27700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4407
  "eval_samples_per_second": 25.439,
4408
  "eval_steps_per_second": 3.181,
4409
  "step": 27500
4410
+ },
4411
+ {
4412
+ "epoch": 11.62,
4413
+ "grad_norm": 2.0867862701416016,
4414
+ "learning_rate": 3.085865724381626e-05,
4415
+ "loss": 1.1146,
4416
+ "step": 27600
4417
+ },
4418
+ {
4419
+ "epoch": 11.62,
4420
+ "eval_cer": 0.41358374721364044,
4421
+ "eval_loss": 2.7960143089294434,
4422
+ "eval_runtime": 392.1126,
4423
+ "eval_samples_per_second": 24.172,
4424
+ "eval_steps_per_second": 3.022,
4425
+ "step": 27600
4426
+ },
4427
+ {
4428
+ "epoch": 11.66,
4429
+ "grad_norm": 1.8952158689498901,
4430
+ "learning_rate": 3.0787985865724386e-05,
4431
+ "loss": 1.1186,
4432
+ "step": 27700
4433
+ },
4434
+ {
4435
+ "epoch": 11.66,
4436
+ "eval_cer": 0.41495004106214073,
4437
+ "eval_loss": 2.6641368865966797,
4438
+ "eval_runtime": 367.6488,
4439
+ "eval_samples_per_second": 25.78,
4440
+ "eval_steps_per_second": 3.223,
4441
+ "step": 27700
4442
  }
4443
  ],
4444
  "logging_steps": 100,
 
4446
  "num_input_tokens_seen": 0,
4447
  "num_train_epochs": 30,
4448
  "save_steps": 100,
4449
+ "total_flos": 3.035915998579374e+20,
4450
  "train_batch_size": 8,
4451
  "trial_name": null,
4452
  "trial_params": null