ashanhr commited on
Commit
accfc4d
1 Parent(s): d7003c7

Training in progress, step 16500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b8f159de36d64281ddd74c716173e22dda43638490d210d663f6239504808c0
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcfe122bd2d59d01967428904ec5ea9fa859cb879f1770bb1d1e760b1b7688c
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0177c8aa518ca2ca2c236f872b9ea81f1b5759873aabb911e5262ebffe2abe3b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82abcf3aa79e7aebdfb7e800e2da4ec60cfd150519c69f6e42fff57f12e5649
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bb867ed0ec5031e1aeb6ba6b339623fbe0e6f95f55f1e5e5eb609dd6315043f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4863c0159897cbd8e6cad80102a7234e43e4dd9ec61c50a16d612844e381c121
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c21a862ff8666bb46f50a482a414a6564395b399e94962d06b8d979c0e47eaf1
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958c40c9a12bc1ec4000782ee24253ec47677cd329681042cb696da20d4daa37
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d885b462e40c8dc986005d71bf297429595c7992d35a7d138df0b70b9a59f663
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:815a23a9cc5b941ec9802a205234677f07d3ff4d13a1a86a9f1b1c15fe345d83
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.861713323510839,
5
  "eval_steps": 100,
6
- "global_step": 16300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2615,6 +2615,38 @@
2615
  "eval_samples_per_second": 26.288,
2616
  "eval_steps_per_second": 3.287,
2617
  "step": 16300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2618
  }
2619
  ],
2620
  "logging_steps": 100,
@@ -2622,7 +2654,7 @@
2622
  "num_input_tokens_seen": 0,
2623
  "num_train_epochs": 30,
2624
  "save_steps": 100,
2625
- "total_flos": 1.785994345378592e+20,
2626
  "train_batch_size": 8,
2627
  "trial_name": null,
2628
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.94590612502631,
5
  "eval_steps": 100,
6
+ "global_step": 16500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2615
  "eval_samples_per_second": 26.288,
2616
  "eval_steps_per_second": 3.287,
2617
  "step": 16300
2618
+ },
2619
+ {
2620
+ "epoch": 6.9,
2621
+ "grad_norm": 8.180316925048828,
2622
+ "learning_rate": 3.877102473498234e-05,
2623
+ "loss": 1.6474,
2624
+ "step": 16400
2625
+ },
2626
+ {
2627
+ "epoch": 6.9,
2628
+ "eval_cer": 0.4800946384576278,
2629
+ "eval_loss": 2.309068202972412,
2630
+ "eval_runtime": 377.3872,
2631
+ "eval_samples_per_second": 25.115,
2632
+ "eval_steps_per_second": 3.14,
2633
+ "step": 16400
2634
+ },
2635
+ {
2636
+ "epoch": 6.95,
2637
+ "grad_norm": 1.7500585317611694,
2638
+ "learning_rate": 3.870035335689046e-05,
2639
+ "loss": 1.3755,
2640
+ "step": 16500
2641
+ },
2642
+ {
2643
+ "epoch": 6.95,
2644
+ "eval_cer": 0.4641219545578976,
2645
+ "eval_loss": 1.5506832599639893,
2646
+ "eval_runtime": 359.5594,
2647
+ "eval_samples_per_second": 26.36,
2648
+ "eval_steps_per_second": 3.296,
2649
+ "step": 16500
2650
  }
2651
  ],
2652
  "logging_steps": 100,
 
2654
  "num_input_tokens_seen": 0,
2655
  "num_train_epochs": 30,
2656
  "save_steps": 100,
2657
+ "total_flos": 1.807384038236432e+20,
2658
  "train_batch_size": 8,
2659
  "trial_name": null,
2660
  "trial_params": null