ashanhr commited on
Commit
c1c779a
1 Parent(s): 30df762

Training in progress, step 47100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dfbe3fca61cc77c707b6d697b41387c813bc32fd6d32bd5c1c26762aa6213e5
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23224f934a17dc506ef1427bbf8cfdde7e61c9bdb99bd40ce08a456d490987e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a79ff95105c45438b111e350a13810a36dce99b6fcbea094936e85e5e0a2e3ab
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7371001d4ba61fec2f6a59fd465a7448ca595cd03c65ce64b593c30a44dfdc70
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96fafe221adf68f53b28e28362f927e1c10f230fee3f19fb1f737d95674a280f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b8ed1471e9d069014988f3a9a12d0676eb5b06684acbd3950cb1c7d97542ef
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65da35c1d2659bcfb09a26ed28519b60a522956dd7f331870d788477aa1a474
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78799255d33f45caeb9e5437f76e75e8206e20b85f04c1c95d15dd5e461cdfad
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3e7a17c080461acc65ee185f2f70c19263f58e165eb35d2d33f300ca2a7104a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0e4855190486656b49bc2f9aa7a1bf13ae4f23b65ee19ad5630270c8d3910a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.743211955377816,
5
  "eval_steps": 100,
6
- "global_step": 46900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7511,6 +7511,38 @@
7511
  "eval_samples_per_second": 25.662,
7512
  "eval_steps_per_second": 3.208,
7513
  "step": 46900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7514
  }
7515
  ],
7516
  "logging_steps": 100,
@@ -7518,7 +7550,7 @@
7518
  "num_input_tokens_seen": 0,
7519
  "num_train_epochs": 30,
7520
  "save_steps": 100,
7521
- "total_flos": 5.13961492307041e+20,
7522
  "train_batch_size": 8,
7523
  "trial_name": null,
7524
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.827404756893287,
5
  "eval_steps": 100,
6
+ "global_step": 47100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7511
  "eval_samples_per_second": 25.662,
7512
  "eval_steps_per_second": 3.208,
7513
  "step": 46900
7514
+ },
7515
+ {
7516
+ "epoch": 19.79,
7517
+ "grad_norm": 2.5223989486694336,
7518
+ "learning_rate": 1.7153356890459365e-05,
7519
+ "loss": 0.4852,
7520
+ "step": 47000
7521
+ },
7522
+ {
7523
+ "epoch": 19.79,
7524
+ "eval_cer": 0.34921639748152206,
7525
+ "eval_loss": 2.3917760848999023,
7526
+ "eval_runtime": 383.3871,
7527
+ "eval_samples_per_second": 24.722,
7528
+ "eval_steps_per_second": 3.091,
7529
+ "step": 47000
7530
+ },
7531
+ {
7532
+ "epoch": 19.83,
7533
+ "grad_norm": 13.549793243408203,
7534
+ "learning_rate": 1.7082685512367493e-05,
7535
+ "loss": 0.4983,
7536
+ "step": 47100
7537
+ },
7538
+ {
7539
+ "epoch": 19.83,
7540
+ "eval_cer": 0.34820450901411754,
7541
+ "eval_loss": 2.331942319869995,
7542
+ "eval_runtime": 370.3076,
7543
+ "eval_samples_per_second": 25.595,
7544
+ "eval_steps_per_second": 3.2,
7545
+ "step": 47100
7546
  }
7547
  ],
7548
  "logging_steps": 100,
 
7550
  "num_input_tokens_seen": 0,
7551
  "num_train_epochs": 30,
7552
  "save_steps": 100,
7553
+ "total_flos": 5.161374507685376e+20,
7554
  "train_batch_size": 8,
7555
  "trial_name": null,
7556
  "trial_params": null