ashanhr commited on
Commit
e691d5d
1 Parent(s): fb2c0eb

Training in progress, step 34100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4186f3650dd2c32b5c95649922f0c731bb934e125494ff655895c62239c4ca2
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d7ca71a18539b0e5311a0dea1f79620c8829516c4f0301b70e40c93b6dd15f
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff80ab491126fed19c95d290404bfe1af60734b253ff71a62d4b455264686070
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787f866570435da5097c03fd63a23ac3505a4326fb04f45fe6573d825061c9af
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7977c1dc90467f2c33a0b7b407c78f8d85c3fac6707f046993eb9f84bc014848
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0528877f155f8458ae5e6d76793f9931e86cb37a8bd5f1b42f20b0c8c8a75e5b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf8717eba5f8c781613f5361807f6b34d91ead5957ce05c9b325c92b9b4e28f0
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5796092c4b36ff1ae184d4548d62d205ae43e0e05d09ee1112cfb91312a8abf2
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc335cf92591df6c603a8f2a60377fa97b6288d754e2350836d0114c03b2d445
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce014af992705354a44ccf452350ccb3c435d9250a9da5c22acfca9b67b0358b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.270679856872237,
5
  "eval_steps": 100,
6
- "global_step": 33900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5431,6 +5431,38 @@
5431
  "eval_samples_per_second": 25.369,
5432
  "eval_steps_per_second": 3.172,
5433
  "step": 33900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5434
  }
5435
  ],
5436
  "logging_steps": 100,
@@ -5438,7 +5470,7 @@
5438
  "num_input_tokens_seen": 0,
5439
  "num_train_epochs": 30,
5440
  "save_steps": 100,
5441
- "total_flos": 3.715219759674186e+20,
5442
  "train_batch_size": 8,
5443
  "trial_name": null,
5444
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.354872658387707,
5
  "eval_steps": 100,
6
+ "global_step": 34100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5431
  "eval_samples_per_second": 25.369,
5432
  "eval_steps_per_second": 3.172,
5433
  "step": 33900
5434
+ },
5435
+ {
5436
+ "epoch": 14.31,
5437
+ "grad_norm": 6.195644855499268,
5438
+ "learning_rate": 2.6337102473498232e-05,
5439
+ "loss": 0.9263,
5440
+ "step": 34000
5441
+ },
5442
+ {
5443
+ "epoch": 14.31,
5444
+ "eval_cer": 0.39136375190645656,
5445
+ "eval_loss": 2.636327028274536,
5446
+ "eval_runtime": 397.3915,
5447
+ "eval_samples_per_second": 23.851,
5448
+ "eval_steps_per_second": 2.982,
5449
+ "step": 34000
5450
+ },
5451
+ {
5452
+ "epoch": 14.35,
5453
+ "grad_norm": 8.33462905883789,
5454
+ "learning_rate": 2.6266431095406364e-05,
5455
+ "loss": 1.2049,
5456
+ "step": 34100
5457
+ },
5458
+ {
5459
+ "epoch": 14.35,
5460
+ "eval_cer": 0.38994857455711546,
5461
+ "eval_loss": 1.6413642168045044,
5462
+ "eval_runtime": 373.2342,
5463
+ "eval_samples_per_second": 25.394,
5464
+ "eval_steps_per_second": 3.175,
5465
+ "step": 34100
5466
  }
5467
  ],
5468
  "logging_steps": 100,
 
5470
  "num_input_tokens_seen": 0,
5471
  "num_train_epochs": 30,
5472
  "save_steps": 100,
5473
+ "total_flos": 3.7367891051512955e+20,
5474
  "train_batch_size": 8,
5475
  "trial_name": null,
5476
  "trial_params": null