ashanhr commited on
Commit
90ccc9f
1 Parent(s): b039785

Training in progress, step 52900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fe1c93b8017d74a09b637d5419b72f561139b3c1af762e8562b3cb8786654a7
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38856c5de97012a6bbddb265016f45f787498a984601f0d5f3432a3dd21d3b08
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f313c4502ab1516bf134a75993c4c561fdee3ec545f99a82ba52cad86980eb
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f7daf3b2a573c62d3f1e255b201aea1d79bd60a6b957765f25b4a1f67c0e2c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a976a4a597b8b9341405e32ca9ff17df9bc058b930b3fb15473565039dee635
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2396e720c3c604a71d73a5cfdd04c82f1b06481005dcbfbc4e609d17cd8dc459
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d694511c38cf842adf62f634f5a20eb55c12c93d605d719c21b8dcbbcf72b0
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f4b3f4fdec252a3fb47435e761c164da7b4d341aa24a2cc22079721594d2e4
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:726de08061b13aa65db2732d53ac93e0334fa97527eefc3b67e0a6227d537660
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07280b3905388184ad6f524c0c254f08ba9be422c3d6452b00f45b36031ca6fa
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.184803199326456,
5
  "eval_steps": 100,
6
- "global_step": 52700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8439,6 +8439,38 @@
8439
  "eval_samples_per_second": 24.005,
8440
  "eval_steps_per_second": 3.001,
8441
  "step": 52700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8442
  }
8443
  ],
8444
  "logging_steps": 100,
@@ -8446,7 +8478,7 @@
8446
  "num_input_tokens_seen": 0,
8447
  "num_train_epochs": 30,
8448
  "save_steps": 100,
8449
- "total_flos": 5.7751950642202254e+20,
8450
  "train_batch_size": 8,
8451
  "trial_name": null,
8452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.268996000841927,
5
  "eval_steps": 100,
6
+ "global_step": 52900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8439
  "eval_samples_per_second": 24.005,
8440
  "eval_steps_per_second": 3.001,
8441
  "step": 52700
8442
+ },
8443
+ {
8444
+ "epoch": 22.23,
8445
+ "grad_norm": 1.9212164878845215,
8446
+ "learning_rate": 1.305583038869258e-05,
8447
+ "loss": 0.3058,
8448
+ "step": 52800
8449
+ },
8450
+ {
8451
+ "epoch": 22.23,
8452
+ "eval_cer": 0.3395081342145399,
8453
+ "eval_loss": 2.419398546218872,
8454
+ "eval_runtime": 417.7544,
8455
+ "eval_samples_per_second": 22.688,
8456
+ "eval_steps_per_second": 2.837,
8457
+ "step": 52800
8458
+ },
8459
+ {
8460
+ "epoch": 22.27,
8461
+ "grad_norm": 4.1538004875183105,
8462
+ "learning_rate": 1.2985159010600709e-05,
8463
+ "loss": 0.2954,
8464
+ "step": 52900
8465
+ },
8466
+ {
8467
+ "epoch": 22.27,
8468
+ "eval_cer": 0.33905107348167846,
8469
+ "eval_loss": 2.4820761680603027,
8470
+ "eval_runtime": 388.8242,
8471
+ "eval_samples_per_second": 24.376,
8472
+ "eval_steps_per_second": 3.048,
8473
+ "step": 52900
8474
  }
8475
  ],
8476
  "logging_steps": 100,
 
8478
  "num_input_tokens_seen": 0,
8479
  "num_train_epochs": 30,
8480
  "save_steps": 100,
8481
+ "total_flos": 5.796971564047972e+20,
8482
  "train_batch_size": 8,
8483
  "trial_name": null,
8484
  "trial_params": null