ashanhr commited on
Commit
489ed29
·
verified ·
1 Parent(s): a4aac60

Training in progress, step 20400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1da8b304c458a518082aa8726d59c13923b22bcbfafb8c9974ba081498990923
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424006b63705ff8d128ce03cf112d0f0202b9b53563618c80069b74aadf6b121
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d579f962773a873e990bf64f2ef77b2bda2450b161ecf4daba9ef7ab67c9e1e
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486219a9cbd763fd42552ee2a1a2647b831dc1b08e3f181c8b5b51fe133f0228
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05592761d986af7a947218805add8c50c1c8d6fdee22c120c1dc80c2d3c678c3
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0908165977f8c3d9489a849f85d9268f712bef0a616c664c5b76ba43a71c7f3d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:514e6bb8aa1fefb3e31c68440ba01eb2833533f9f34d13f635293e87198bd4a2
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9aa79a869a104ab7e27c688f3cfabb1a5b016e6c0d9194e567985352b95f11
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505d5d43fa0ef4ab6941e01c5cac91243ad894a08db7c0784e965134236be7af
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006c5e8305ef9dc1fe3403e1173068b8cb7f990d0fc5d4b955050dc8157ec06d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.503472953062513,
5
  "eval_steps": 100,
6
- "global_step": 20200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3239,6 +3239,38 @@
3239
  "eval_samples_per_second": 26.116,
3240
  "eval_steps_per_second": 3.265,
3241
  "step": 20200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3242
  }
3243
  ],
3244
  "logging_steps": 100,
@@ -3246,7 +3278,7 @@
3246
  "num_input_tokens_seen": 0,
3247
  "num_train_epochs": 30,
3248
  "save_steps": 100,
3249
- "total_flos": 2.2137141360320797e+20,
3250
  "train_batch_size": 8,
3251
  "trial_name": null,
3252
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.587665754577984,
5
  "eval_steps": 100,
6
+ "global_step": 20400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3239
  "eval_samples_per_second": 26.116,
3240
  "eval_steps_per_second": 3.265,
3241
  "step": 20200
3242
+ },
3243
+ {
3244
+ "epoch": 8.55,
3245
+ "grad_norm": 6.447911262512207,
3246
+ "learning_rate": 3.6016254416961133e-05,
3247
+ "loss": 2.11,
3248
+ "step": 20300
3249
+ },
3250
+ {
3251
+ "epoch": 8.55,
3252
+ "eval_cer": 0.43633657267998904,
3253
+ "eval_loss": 3.3446898460388184,
3254
+ "eval_runtime": 385.9035,
3255
+ "eval_samples_per_second": 24.561,
3256
+ "eval_steps_per_second": 3.071,
3257
+ "step": 20300
3258
+ },
3259
+ {
3260
+ "epoch": 8.59,
3261
+ "grad_norm": 2.172891139984131,
3262
+ "learning_rate": 3.5945583038869255e-05,
3263
+ "loss": 1.4999,
3264
+ "step": 20400
3265
+ },
3266
+ {
3267
+ "epoch": 8.59,
3268
+ "eval_cer": 0.4393355754565719,
3269
+ "eval_loss": 3.0930521488189697,
3270
+ "eval_runtime": 363.4753,
3271
+ "eval_samples_per_second": 26.076,
3272
+ "eval_steps_per_second": 3.26,
3273
+ "step": 20400
3274
  }
3275
  ],
3276
  "logging_steps": 100,
 
3278
  "num_input_tokens_seen": 0,
3279
  "num_train_epochs": 30,
3280
  "save_steps": 100,
3281
+ "total_flos": 2.2352658822253032e+20,
3282
  "train_batch_size": 8,
3283
  "trial_name": null,
3284
  "trial_params": null