ashanhr commited on
Commit
c79ac80
1 Parent(s): c9e87f3

Training in progress, step 37700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcc05bffb8c241d61a3dc3d581cb5886a3909df431621dcf4dd9ce53d1493de7
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020f93a158168205cca3915538c4ed9109b5ffc2b96fdeb0f1c4d79a07042c55
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e03ec0ba4df87757138b2f936b9502249cf8be8c7b02363976349bcb69c91ca3
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3befc5d87f20164d7ddccdca6c81786e192784cb6ebe0faa3bf6a1782ef9e1bc
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a522d20a06a7366cf25154577cd411876c1eae2ed75c78dc5cb6dbb8ab486ded
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3dc33eb709a480424a714d2444abb7f1cdabf9f95720d6a97f58d1827cb80e
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f57f2e335f6242f37d8f62964bd040064cd24494d16cadfe606f7af6e6e33ed2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3db78812f5b1281be37cba1a96865a0e99afcb4f7c40d8dd207953ac5fefc5
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331c71e4dbe6e06ea5ac4e13a3a74ea2edc9963430ef549063c5def06827f8ad
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc911d1c134d3a4e026e82aa7a427672312a8867cae577cfbbeccb41bd9874b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.786150284150706,
5
  "eval_steps": 100,
6
- "global_step": 37500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6007,6 +6007,38 @@
6007
  "eval_samples_per_second": 25.348,
6008
  "eval_steps_per_second": 3.169,
6009
  "step": 37500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6010
  }
6011
  ],
6012
  "logging_steps": 100,
@@ -6014,7 +6046,7 @@
6014
  "num_input_tokens_seen": 0,
6015
  "num_train_epochs": 30,
6016
  "save_steps": 100,
6017
- "total_flos": 4.109762412671853e+20,
6018
  "train_batch_size": 8,
6019
  "trial_name": null,
6020
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.870343085666175,
5
  "eval_steps": 100,
6
+ "global_step": 37700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6007
  "eval_samples_per_second": 25.348,
6008
  "eval_steps_per_second": 3.169,
6009
  "step": 37500
6010
+ },
6011
+ {
6012
+ "epoch": 15.83,
6013
+ "grad_norm": 10.501407623291016,
6014
+ "learning_rate": 2.3793639575971734e-05,
6015
+ "loss": 0.843,
6016
+ "step": 37600
6017
+ },
6018
+ {
6019
+ "epoch": 15.83,
6020
+ "eval_cer": 0.37860271401196666,
6021
+ "eval_loss": 1.4808061122894287,
6022
+ "eval_runtime": 394.8535,
6023
+ "eval_samples_per_second": 24.004,
6024
+ "eval_steps_per_second": 3.001,
6025
+ "step": 37600
6026
+ },
6027
+ {
6028
+ "epoch": 15.87,
6029
+ "grad_norm": 4.368974208831787,
6030
+ "learning_rate": 2.372296819787986e-05,
6031
+ "loss": 0.824,
6032
+ "step": 37700
6033
+ },
6034
+ {
6035
+ "epoch": 15.87,
6036
+ "eval_cer": 0.3751050995268077,
6037
+ "eval_loss": 1.6123182773590088,
6038
+ "eval_runtime": 373.2585,
6039
+ "eval_samples_per_second": 25.393,
6040
+ "eval_steps_per_second": 3.175,
6041
+ "step": 37700
6042
  }
6043
  ],
6044
  "logging_steps": 100,
 
6046
  "num_input_tokens_seen": 0,
6047
  "num_train_epochs": 30,
6048
  "save_steps": 100,
6049
+ "total_flos": 4.1315834714403255e+20,
6050
  "train_batch_size": 8,
6051
  "trial_name": null,
6052
  "trial_params": null