ashanhr commited on
Commit
49c7a66
1 Parent(s): c1c779a

Training in progress, step 47300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b23224f934a17dc506ef1427bbf8cfdde7e61c9bdb99bd40ce08a456d490987e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a7eca189b6cba3d0264a3a6b528f4d768503319d4957c82aafb34cecdfdf4a
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7371001d4ba61fec2f6a59fd465a7448ca595cd03c65ce64b593c30a44dfdc70
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44c9dabcde04ec924b500d0f45e98bbda60772b9108bedac5d65641ce067cb9
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07b8ed1471e9d069014988f3a9a12d0676eb5b06684acbd3950cb1c7d97542ef
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5815b1015337a35948babb6d5b6ab9cf7c776143678a541486d413ba284536e
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78799255d33f45caeb9e5437f76e75e8206e20b85f04c1c95d15dd5e461cdfad
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d54689fcde880091f1c0ee11daaede986eacb0d5ba40ec6c4cb9bad1bdacaa9
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc0e4855190486656b49bc2f9aa7a1bf13ae4f23b65ee19ad5630270c8d3910a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0677dbdd096b9b20e1b9970b686999e123df432ab47b960e13026ac8fbafd2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.827404756893287,
5
  "eval_steps": 100,
6
- "global_step": 47100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7543,6 +7543,38 @@
7543
  "eval_samples_per_second": 25.595,
7544
  "eval_steps_per_second": 3.2,
7545
  "step": 47100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7546
  }
7547
  ],
7548
  "logging_steps": 100,
@@ -7550,7 +7582,7 @@
7550
  "num_input_tokens_seen": 0,
7551
  "num_train_epochs": 30,
7552
  "save_steps": 100,
7553
- "total_flos": 5.161374507685376e+20,
7554
  "train_batch_size": 8,
7555
  "trial_name": null,
7556
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.911597558408754,
5
  "eval_steps": 100,
6
+ "global_step": 47300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7543
  "eval_samples_per_second": 25.595,
7544
  "eval_steps_per_second": 3.2,
7545
  "step": 47100
7546
+ },
7547
+ {
7548
+ "epoch": 19.87,
7549
+ "grad_norm": 1.6992497444152832,
7550
+ "learning_rate": 1.701201413427562e-05,
7551
+ "loss": 0.4712,
7552
+ "step": 47200
7553
+ },
7554
+ {
7555
+ "epoch": 19.87,
7556
+ "eval_cer": 0.34815073716319267,
7557
+ "eval_loss": 2.558990001678467,
7558
+ "eval_runtime": 412.1751,
7559
+ "eval_samples_per_second": 22.995,
7560
+ "eval_steps_per_second": 2.875,
7561
+ "step": 47200
7562
+ },
7563
+ {
7564
+ "epoch": 19.91,
7565
+ "grad_norm": 3.102717399597168,
7566
+ "learning_rate": 1.6941342756183746e-05,
7567
+ "loss": 0.4767,
7568
+ "step": 47300
7569
+ },
7570
+ {
7571
+ "epoch": 19.91,
7572
+ "eval_cer": 0.3449659770834148,
7573
+ "eval_loss": 2.632328748703003,
7574
+ "eval_runtime": 385.8283,
7575
+ "eval_samples_per_second": 24.565,
7576
+ "eval_steps_per_second": 3.071,
7577
+ "step": 47300
7578
  }
7579
  ],
7580
  "logging_steps": 100,
 
7582
  "num_input_tokens_seen": 0,
7583
  "num_train_epochs": 30,
7584
  "save_steps": 100,
7585
+ "total_flos": 5.183308424712062e+20,
7586
  "train_batch_size": 8,
7587
  "trial_name": null,
7588
  "trial_params": null