ashanhr commited on
Commit
cf0e211
·
verified ·
1 Parent(s): 8224955

Training in progress, step 26900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eff3cdcb7d4b7ed611afcd9ebd6643d02bc2e1b35c4bb74c08c50cf9a5f561a7
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:378dc527ac459832013c97617137877ef2787d8ad0a5019604ca85aa956d8e7e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3b6b9e0933269179cf5434f263f8f9d9d7b6fff1b073c6931905cc4b53994b5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c23db54888854e9599d18a9d3bb2ca8aba5577e50d015aa3e0495e88b82be4
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad304c52a9c03014020a10f709aaf27545830076ddab93a26c7de57de9dde158
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0e4523778043b99de59919488370129ad827e481ce7777fc2e2ed0192be8c4
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:840a7138cc3ba7fbc10190e1e4c329f4c41a56a5385a8c0310bf23bf7b9dc752
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab6c67f572fa86813b890e9a102c067c20a3ae8213e83df8a69f01dcfe18650
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ac896d0e921c51d399d94984f4a87a03619430db29ebc79fed180feeab8b5af
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920901db8d07342ad0b34f21db24e241a543d399c53204a75752af51c73d0ca9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.239739002315302,
5
  "eval_steps": 100,
6
- "global_step": 26700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4279,6 +4279,38 @@
4279
  "eval_samples_per_second": 25.739,
4280
  "eval_steps_per_second": 3.218,
4281
  "step": 26700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4282
  }
4283
  ],
4284
  "logging_steps": 100,
@@ -4286,7 +4318,7 @@
4286
  "num_input_tokens_seen": 0,
4287
  "num_train_epochs": 30,
4288
  "save_steps": 100,
4289
- "total_flos": 2.9261113218403457e+20,
4290
  "train_batch_size": 8,
4291
  "trial_name": null,
4292
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.323931803830773,
5
  "eval_steps": 100,
6
+ "global_step": 26900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4279
  "eval_samples_per_second": 25.739,
4280
  "eval_steps_per_second": 3.218,
4281
  "step": 26700
4282
+ },
4283
+ {
4284
+ "epoch": 11.28,
4285
+ "grad_norm": 1.7043545246124268,
4286
+ "learning_rate": 3.142402826855124e-05,
4287
+ "loss": 1.1348,
4288
+ "step": 26800
4289
+ },
4290
+ {
4291
+ "epoch": 11.28,
4292
+ "eval_cer": 0.41776573071057055,
4293
+ "eval_loss": 2.2900288105010986,
4294
+ "eval_runtime": 394.8533,
4295
+ "eval_samples_per_second": 24.004,
4296
+ "eval_steps_per_second": 3.001,
4297
+ "step": 26800
4298
+ },
4299
+ {
4300
+ "epoch": 11.32,
4301
+ "grad_norm": 1.8619517087936401,
4302
+ "learning_rate": 3.135335689045937e-05,
4303
+ "loss": 1.1366,
4304
+ "step": 26900
4305
+ },
4306
+ {
4307
+ "epoch": 11.32,
4308
+ "eval_cer": 0.42347532360877554,
4309
+ "eval_loss": 1.863010287284851,
4310
+ "eval_runtime": 370.2336,
4311
+ "eval_samples_per_second": 25.6,
4312
+ "eval_steps_per_second": 3.201,
4313
+ "step": 26900
4314
  }
4315
  ],
4316
  "logging_steps": 100,
 
4318
  "num_input_tokens_seen": 0,
4319
  "num_train_epochs": 30,
4320
  "save_steps": 100,
4321
+ "total_flos": 2.9479302465142643e+20,
4322
  "train_batch_size": 8,
4323
  "trial_name": null,
4324
  "trial_params": null