ashanhr commited on
Commit
fb2c0eb
·
verified ·
1 Parent(s): 042f132

Training in progress, step 33900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bdec49f5ba937575c37201bec00db52b8c3e844821eecc385ba239a33663f10
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4186f3650dd2c32b5c95649922f0c731bb934e125494ff655895c62239c4ca2
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:619c1c99184b876365ca973837576037ba7b39716899416a2f1019041043ea17
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff80ab491126fed19c95d290404bfe1af60734b253ff71a62d4b455264686070
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0729b7a58599ea8921b314d9c9621e535f2d82f309864faa4b6d052417b8b263
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7977c1dc90467f2c33a0b7b407c78f8d85c3fac6707f046993eb9f84bc014848
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44661c431a879e3226aadc89367134cd0b53d52f6af54472aaa115083d69b12e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8717eba5f8c781613f5361807f6b34d91ead5957ce05c9b325c92b9b4e28f0
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a48819e7745c34bedd45ae62b08be3d10aa46039968cd3e6efddcf11d1d5426
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc335cf92591df6c603a8f2a60377fa97b6288d754e2350836d0114c03b2d445
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.186487055356768,
5
  "eval_steps": 100,
6
- "global_step": 33700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5399,6 +5399,38 @@
5399
  "eval_samples_per_second": 25.278,
5400
  "eval_steps_per_second": 3.16,
5401
  "step": 33700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5402
  }
5403
  ],
5404
  "logging_steps": 100,
@@ -5406,7 +5438,7 @@
5406
  "num_input_tokens_seen": 0,
5407
  "num_train_epochs": 30,
5408
  "save_steps": 100,
5409
- "total_flos": 3.693186041855623e+20,
5410
  "train_batch_size": 8,
5411
  "trial_name": null,
5412
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.270679856872237,
5
  "eval_steps": 100,
6
+ "global_step": 33900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5399
  "eval_samples_per_second": 25.278,
5400
  "eval_steps_per_second": 3.16,
5401
  "step": 33700
5402
+ },
5403
+ {
5404
+ "epoch": 14.23,
5405
+ "grad_norm": 2.4856455326080322,
5406
+ "learning_rate": 2.647844522968198e-05,
5407
+ "loss": 0.9387,
5408
+ "step": 33800
5409
+ },
5410
+ {
5411
+ "epoch": 14.23,
5412
+ "eval_cer": 0.3982514371749247,
5413
+ "eval_loss": 1.5301440954208374,
5414
+ "eval_runtime": 391.5929,
5415
+ "eval_samples_per_second": 24.204,
5416
+ "eval_steps_per_second": 3.026,
5417
+ "step": 33800
5418
+ },
5419
+ {
5420
+ "epoch": 14.27,
5421
+ "grad_norm": 21.76763916015625,
5422
+ "learning_rate": 2.6407773851590107e-05,
5423
+ "loss": 0.9334,
5424
+ "step": 33900
5425
+ },
5426
+ {
5427
+ "epoch": 14.27,
5428
+ "eval_cer": 0.3909311329240155,
5429
+ "eval_loss": 2.4011144638061523,
5430
+ "eval_runtime": 373.6109,
5431
+ "eval_samples_per_second": 25.369,
5432
+ "eval_steps_per_second": 3.172,
5433
+ "step": 33900
5434
  }
5435
  ],
5436
  "logging_steps": 100,
 
5438
  "num_input_tokens_seen": 0,
5439
  "num_train_epochs": 30,
5440
  "save_steps": 100,
5441
+ "total_flos": 3.715219759674186e+20,
5442
  "train_batch_size": 8,
5443
  "trial_name": null,
5444
  "trial_params": null