ashanhr commited on
Commit
9653b13
·
verified ·
1 Parent(s): f1f9b9f

Training in progress, step 25700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ab007dd960c183c2e30e85616514e3da6c3660d56809b3a634e02652595cf69
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7781a53d6b5b7512ff0ff1d38c7eaabb023d480f361f98431565fe95b327859
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6778800cc796c45ea30ad3a22874ae830638e117c355360ea6c92a3dfd474eab
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d83ec1e6bf950a1d60198c789353d4e7b343be90d1474a02b4880043c647b8
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e13a3e04c3bc95af16ed8a2dd0f5ca31f7946fe6e95a84fd44ba865570960002
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43534489fe9b0667499677fea38991daee7ef7e44c8bba59d24e7af296df2cf
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:846f249b88befad596e3b348fc9d3af50438f51fe4f7aa809cab2c988dc38c0e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20fff49ae707308451e57857b198857501948a824acc26025a78021e3c4d9970
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e006367d18ff2557748c2cef8502481c62684745dc5fb7eb9b01c6baf1a2184
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b567208206c71e420de4afc268070bb8059d13be7ed02d0130d55f8213c8c995
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.73458219322248,
5
  "eval_steps": 100,
6
- "global_step": 25500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4087,6 +4087,38 @@
4087
  "eval_samples_per_second": 25.6,
4088
  "eval_steps_per_second": 3.201,
4089
  "step": 25500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4090
  }
4091
  ],
4092
  "logging_steps": 100,
@@ -4094,7 +4126,7 @@
4094
  "num_input_tokens_seen": 0,
4095
  "num_train_epochs": 30,
4096
  "save_steps": 100,
4097
- "total_flos": 2.7941943750609027e+20,
4098
  "train_batch_size": 8,
4099
  "trial_name": null,
4100
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.81877499473795,
5
  "eval_steps": 100,
6
+ "global_step": 25700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4087
  "eval_samples_per_second": 25.6,
4088
  "eval_steps_per_second": 3.201,
4089
  "step": 25500
4090
+ },
4091
+ {
4092
+ "epoch": 10.78,
4093
+ "grad_norm": 3.47428035736084,
4094
+ "learning_rate": 3.2271378091872796e-05,
4095
+ "loss": 1.3785,
4096
+ "step": 25600
4097
+ },
4098
+ {
4099
+ "epoch": 10.78,
4100
+ "eval_cer": 0.4252571272144226,
4101
+ "eval_loss": 2.5334064960479736,
4102
+ "eval_runtime": 380.018,
4103
+ "eval_samples_per_second": 24.941,
4104
+ "eval_steps_per_second": 3.118,
4105
+ "step": 25600
4106
+ },
4107
+ {
4108
+ "epoch": 10.82,
4109
+ "grad_norm": 2.5327584743499756,
4110
+ "learning_rate": 3.2200706713780924e-05,
4111
+ "loss": 1.518,
4112
+ "step": 25700
4113
+ },
4114
+ {
4115
+ "epoch": 10.82,
4116
+ "eval_cer": 0.42553087481913104,
4117
+ "eval_loss": 2.6629116535186768,
4118
+ "eval_runtime": 359.4773,
4119
+ "eval_samples_per_second": 26.366,
4120
+ "eval_steps_per_second": 3.296,
4121
+ "step": 25700
4122
  }
4123
  ],
4124
  "logging_steps": 100,
 
4126
  "num_input_tokens_seen": 0,
4127
  "num_train_epochs": 30,
4128
  "save_steps": 100,
4129
+ "total_flos": 2.815852220682527e+20,
4130
  "train_batch_size": 8,
4131
  "trial_name": null,
4132
  "trial_params": null