ashanhr commited on
Commit
574d564
·
verified ·
1 Parent(s): 9430c8c

Training in progress, step 25900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7781a53d6b5b7512ff0ff1d38c7eaabb023d480f361f98431565fe95b327859
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6256e490b0e43c626084d84a341037e37434fd9bcabcbc558b3172b0e909df6
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d83ec1e6bf950a1d60198c789353d4e7b343be90d1474a02b4880043c647b8
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8951c5ab590b2778d41e2dc16e2017cb3db0033da9f8d4a91ee266d3a370c3c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43534489fe9b0667499677fea38991daee7ef7e44c8bba59d24e7af296df2cf
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d909f75ea0266c0aca1a09ff4c06a42792691131283c14fd1a26cdd5f46c7cd8
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20fff49ae707308451e57857b198857501948a824acc26025a78021e3c4d9970
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db111608e60b5c73e6c2423d408c2a31f6a8c6486b5343e756b1090ce10da2e
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b567208206c71e420de4afc268070bb8059d13be7ed02d0130d55f8213c8c995
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b045b8a4735f098f688aa2d4a47ca9b1dff6138fd3367077decbe825c59f79
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.81877499473795,
5
  "eval_steps": 100,
6
- "global_step": 25700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4119,6 +4119,38 @@
4119
  "eval_samples_per_second": 26.366,
4120
  "eval_steps_per_second": 3.296,
4121
  "step": 25700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4122
  }
4123
  ],
4124
  "logging_steps": 100,
@@ -4126,7 +4158,7 @@
4126
  "num_input_tokens_seen": 0,
4127
  "num_train_epochs": 30,
4128
  "save_steps": 100,
4129
- "total_flos": 2.815852220682527e+20,
4130
  "train_batch_size": 8,
4131
  "trial_name": null,
4132
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.90296779625342,
5
  "eval_steps": 100,
6
+ "global_step": 25900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4119
  "eval_samples_per_second": 26.366,
4120
  "eval_steps_per_second": 3.296,
4121
  "step": 25700
4122
+ },
4123
+ {
4124
+ "epoch": 10.86,
4125
+ "grad_norm": 5.47965145111084,
4126
+ "learning_rate": 3.2130035335689045e-05,
4127
+ "loss": 1.1865,
4128
+ "step": 25800
4129
+ },
4130
+ {
4131
+ "epoch": 10.86,
4132
+ "eval_cer": 0.4287083023737828,
4133
+ "eval_loss": 1.6172243356704712,
4134
+ "eval_runtime": 386.7546,
4135
+ "eval_samples_per_second": 24.506,
4136
+ "eval_steps_per_second": 3.064,
4137
+ "step": 25800
4138
+ },
4139
+ {
4140
+ "epoch": 10.9,
4141
+ "grad_norm": 7.293168544769287,
4142
+ "learning_rate": 3.2060070671378093e-05,
4143
+ "loss": 1.3999,
4144
+ "step": 25900
4145
+ },
4146
+ {
4147
+ "epoch": 10.9,
4148
+ "eval_cer": 0.42187194478119744,
4149
+ "eval_loss": 1.8544808626174927,
4150
+ "eval_runtime": 365.482,
4151
+ "eval_samples_per_second": 25.933,
4152
+ "eval_steps_per_second": 3.242,
4153
+ "step": 25900
4154
  }
4155
  ],
4156
  "logging_steps": 100,
 
4158
  "num_input_tokens_seen": 0,
4159
  "num_train_epochs": 30,
4160
  "save_steps": 100,
4161
+ "total_flos": 2.8377007760000103e+20,
4162
  "train_batch_size": 8,
4163
  "trial_name": null,
4164
  "trial_params": null