ashanhr commited on
Commit
fa19d9b
·
verified ·
1 Parent(s): caa6e02

Training in progress, step 27100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:378dc527ac459832013c97617137877ef2787d8ad0a5019604ca85aa956d8e7e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e542af8d76367f68d7013ad25645c5c0bb83d400a9c28ba347486b7e1af191
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c23db54888854e9599d18a9d3bb2ca8aba5577e50d015aa3e0495e88b82be4
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ef8497c6e127aeccc2e106755bbf32a546fa8428fa8775d2e9e8d8e5652691
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b0e4523778043b99de59919488370129ad827e481ce7777fc2e2ed0192be8c4
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e7d171ad28335cab6bd4c1150438a645740b385e44943066cf6cf9cdad1d0a8
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ab6c67f572fa86813b890e9a102c067c20a3ae8213e83df8a69f01dcfe18650
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0fd6f9494bd763f598141a559ed76c4d8dfcfafe1cb83a86b361df0b818dea
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920901db8d07342ad0b34f21db24e241a543d399c53204a75752af51c73d0ca9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945cfe3d07d63b571472084df19a5a6f95aade9bf666b3cf8938678e9d425fd2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.323931803830773,
5
  "eval_steps": 100,
6
- "global_step": 26900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4311,6 +4311,38 @@
4311
  "eval_samples_per_second": 25.6,
4312
  "eval_steps_per_second": 3.201,
4313
  "step": 26900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4314
  }
4315
  ],
4316
  "logging_steps": 100,
@@ -4318,7 +4350,7 @@
4318
  "num_input_tokens_seen": 0,
4319
  "num_train_epochs": 30,
4320
  "save_steps": 100,
4321
- "total_flos": 2.9479302465142643e+20,
4322
  "train_batch_size": 8,
4323
  "trial_name": null,
4324
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.408124605346243,
5
  "eval_steps": 100,
6
+ "global_step": 27100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4311
  "eval_samples_per_second": 25.6,
4312
  "eval_steps_per_second": 3.201,
4313
  "step": 26900
4314
+ },
4315
+ {
4316
+ "epoch": 11.37,
4317
+ "grad_norm": 1.628806710243225,
4318
+ "learning_rate": 3.1282685512367496e-05,
4319
+ "loss": 1.1399,
4320
+ "step": 27000
4321
+ },
4322
+ {
4323
+ "epoch": 11.37,
4324
+ "eval_cer": 0.4168051699190489,
4325
+ "eval_loss": 1.782639741897583,
4326
+ "eval_runtime": 391.1594,
4327
+ "eval_samples_per_second": 24.231,
4328
+ "eval_steps_per_second": 3.029,
4329
+ "step": 27000
4330
+ },
4331
+ {
4332
+ "epoch": 11.41,
4333
+ "grad_norm": 3.158604621887207,
4334
+ "learning_rate": 3.121201413427562e-05,
4335
+ "loss": 1.1373,
4336
+ "step": 27100
4337
+ },
4338
+ {
4339
+ "epoch": 11.41,
4340
+ "eval_cer": 0.4201585780767275,
4341
+ "eval_loss": 1.5199880599975586,
4342
+ "eval_runtime": 368.0376,
4343
+ "eval_samples_per_second": 25.753,
4344
+ "eval_steps_per_second": 3.22,
4345
+ "step": 27100
4346
  }
4347
  ],
4348
  "logging_steps": 100,
 
4350
  "num_input_tokens_seen": 0,
4351
  "num_train_epochs": 30,
4352
  "save_steps": 100,
4353
+ "total_flos": 2.96989846970687e+20,
4354
  "train_batch_size": 8,
4355
  "trial_name": null,
4356
  "trial_params": null