ashanhr commited on
Commit
911fdf8
1 Parent(s): 7ea43ad

Training in progress, step 33300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80dede518c72c0bba9c223e7cfed76911fa6394ec26c0650d7e326e1c552fcec
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d5b6ee3ec7774796203e9857adedf4a34ad1a8f9d65eac92d02013104b7486
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8698a6f801a918beb15989fb2bf3a1edbdc2ca6625d90e61434980880b6dbe95
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7eed855e5c83384fc6750fe371ea0613d62eeceb95e59f1e6fb79a4aa75fac5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e88f56416695277fa9c9a9d86e755d054a221cbf389be803e09487f20a0bdc4c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298d5e2bc403ff74d204ac4ad46dbab183f8ce444ab4c0c42dc465d17ad6d84f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7801270dd1e1084f5e8f44290a74a4794810996f2cc1dc68247d892f66491210
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3119fb2b34229087dc7d4734b99ce7345817f422d2b8091f039bff04ac0992
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09f7576dd710ccdafe6cc17194e31546db69ecc2b7cc2efcd8719730dbf27b3b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db2a9d436fb980c51b3b887bf8c6c11afcad56f430dc37015dfbad372f5df29
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.933908650810356,
5
  "eval_steps": 100,
6
- "global_step": 33100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5303,6 +5303,38 @@
5303
  "eval_samples_per_second": 25.678,
5304
  "eval_steps_per_second": 3.21,
5305
  "step": 33100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5306
  }
5307
  ],
5308
  "logging_steps": 100,
@@ -5310,7 +5342,7 @@
5310
  "num_input_tokens_seen": 0,
5311
  "num_train_epochs": 30,
5312
  "save_steps": 100,
5313
- "total_flos": 3.627266703487163e+20,
5314
  "train_batch_size": 8,
5315
  "trial_name": null,
5316
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.018101452325826,
5
  "eval_steps": 100,
6
+ "global_step": 33300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5303
  "eval_samples_per_second": 25.678,
5304
  "eval_steps_per_second": 3.21,
5305
  "step": 33100
5306
+ },
5307
+ {
5308
+ "epoch": 13.98,
5309
+ "grad_norm": 2.1010019779205322,
5310
+ "learning_rate": 2.6902473498233218e-05,
5311
+ "loss": 0.9974,
5312
+ "step": 33200
5313
+ },
5314
+ {
5315
+ "epoch": 13.98,
5316
+ "eval_cer": 0.4032106683352235,
5317
+ "eval_loss": 2.416074514389038,
5318
+ "eval_runtime": 395.8532,
5319
+ "eval_samples_per_second": 23.943,
5320
+ "eval_steps_per_second": 2.994,
5321
+ "step": 33200
5322
+ },
5323
+ {
5324
+ "epoch": 14.02,
5325
+ "grad_norm": 5.9587812423706055,
5326
+ "learning_rate": 2.6831802120141342e-05,
5327
+ "loss": 0.9724,
5328
+ "step": 33300
5329
+ },
5330
+ {
5331
+ "epoch": 14.02,
5332
+ "eval_cer": 0.3974839662117242,
5333
+ "eval_loss": 2.979396343231201,
5334
+ "eval_runtime": 370.8376,
5335
+ "eval_samples_per_second": 25.558,
5336
+ "eval_steps_per_second": 3.195,
5337
+ "step": 33300
5338
  }
5339
  ],
5340
  "logging_steps": 100,
 
5342
  "num_input_tokens_seen": 0,
5343
  "num_train_epochs": 30,
5344
  "save_steps": 100,
5345
+ "total_flos": 3.6491196354345704e+20,
5346
  "train_batch_size": 8,
5347
  "trial_name": null,
5348
  "trial_params": null