ashanhr commited on
Commit
b536031
1 Parent(s): ec235cd

Training in progress, step 50300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c467a55474359ea150af7f199402cf84590658e0255a9165922970cef84e170
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:720bc977d62e7816011389161c4eeae7e71a5e15fa6414fc84828a3a030a05e6
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:489d7ba688753abe3e33833a76b4ae882115ed3d90c66c7a1a12d1ffa7f82528
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5d803a6ecbd25b4508f6830cb0d354f3164cf36de87bc4c942336f1d723990
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f413dee5c854704d5963137b5956e9d25624be5f512c8185c3af5befb0aca4f7
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941eea433be3c1f9adb0f47751c327703c05efe14921d20365ff7c6534eecd09
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7672fe3061dcd7690545b653f58f4ee116feae4c3b67a28c5239aa57cd8e646c
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cbe5fab90a476cca692ca727115b096b7062ef579d4f32a4ad2921bb938c28b
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08ae60acffa23a34f3e2f8fcc14339c1e29295c511dc2b5dad5d07178f8a8bf5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a28e7b27d1cecd73113f4062cdd78f7500a461c5bdb8769059e4729aaa72d3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.09029677962534,
5
  "eval_steps": 100,
6
- "global_step": 50100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8023,6 +8023,38 @@
8023
  "eval_samples_per_second": 24.513,
8024
  "eval_steps_per_second": 3.065,
8025
  "step": 50100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8026
  }
8027
  ],
8028
  "logging_steps": 100,
@@ -8030,7 +8062,7 @@
8030
  "num_input_tokens_seen": 0,
8031
  "num_train_epochs": 30,
8032
  "save_steps": 100,
8033
- "total_flos": 5.490253620598738e+20,
8034
  "train_batch_size": 8,
8035
  "trial_name": null,
8036
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.17448958114081,
5
  "eval_steps": 100,
6
+ "global_step": 50300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8023
  "eval_samples_per_second": 24.513,
8024
  "eval_steps_per_second": 3.065,
8025
  "step": 50100
8026
+ },
8027
+ {
8028
+ "epoch": 21.13,
8029
+ "grad_norm": 2.7008559703826904,
8030
+ "learning_rate": 1.4892579505300355e-05,
8031
+ "loss": 0.3479,
8032
+ "step": 50200
8033
+ },
8034
+ {
8035
+ "epoch": 21.13,
8036
+ "eval_cer": 0.34289331664776507,
8037
+ "eval_loss": 2.209360122680664,
8038
+ "eval_runtime": 412.345,
8039
+ "eval_samples_per_second": 22.986,
8040
+ "eval_steps_per_second": 2.874,
8041
+ "step": 50200
8042
+ },
8043
+ {
8044
+ "epoch": 21.17,
8045
+ "grad_norm": 1.4452073574066162,
8046
+ "learning_rate": 1.482190812720848e-05,
8047
+ "loss": 0.3465,
8048
+ "step": 50300
8049
+ },
8050
+ {
8051
+ "epoch": 21.17,
8052
+ "eval_cer": 0.33977454929412226,
8053
+ "eval_loss": 2.268566370010376,
8054
+ "eval_runtime": 390.1756,
8055
+ "eval_samples_per_second": 24.292,
8056
+ "eval_steps_per_second": 3.037,
8057
+ "step": 50300
8058
  }
8059
  ],
8060
  "logging_steps": 100,
 
8062
  "num_input_tokens_seen": 0,
8063
  "num_train_epochs": 30,
8064
  "save_steps": 100,
8065
+ "total_flos": 5.5126604494545053e+20,
8066
  "train_batch_size": 8,
8067
  "trial_name": null,
8068
  "trial_params": null