ashanhr commited on
Commit
c89dd1f
·
verified ·
1 Parent(s): 671fb49

Training in progress, step 39900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5ed2da44c27bc890c8ea9f6105fd0a53f62d5d80dfd31f75d14fd115db2cdd
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7287b40e0c6de9ef270e710f16c4acc766ed561015816aba94d3ada772955ea0
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f057e139d906c78952ba74b4d37c6edb80db873e643241e191394c956ebbf6a
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a1b4fccd53ff169ebe311d0768ce5fb6e3b059e4140a6f1734cacccd56b0e6
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1931460d5e459c7dd130a6844e5132390ce78447fc54caa1ca9965416b759b0c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0260a4c3ca6ca53fa3b42cf50f7f92b4150ea244f048dbfa1ce457d84f4119fe
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccbf24f06506ce49d495c2f8b2b9de6ab0a93b2d20fbd2a2f06681444c854113
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bfb0adc6bfe8622be753ac3a0f24b82f548e8791dfa2e559460dc20afa0313b
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfdaee5a60fdfdb51226c40d315ad694d9948fae35d824dce01188f3978aafe2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:758837119a231eb27945cf2b19a2b83a829f1699f442e7c9366e800eb23d0d1a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.712271100820878,
5
  "eval_steps": 100,
6
- "global_step": 39700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6359,6 +6359,38 @@
6359
  "eval_samples_per_second": 24.507,
6360
  "eval_steps_per_second": 3.064,
6361
  "step": 39700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6362
  }
6363
  ],
6364
  "logging_steps": 100,
@@ -6366,7 +6398,7 @@
6366
  "num_input_tokens_seen": 0,
6367
  "num_train_epochs": 30,
6368
  "save_steps": 100,
6369
- "total_flos": 4.350920052860814e+20,
6370
  "train_batch_size": 8,
6371
  "trial_name": null,
6372
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.79646390233635,
5
  "eval_steps": 100,
6
+ "global_step": 39900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6359
  "eval_samples_per_second": 24.507,
6360
  "eval_steps_per_second": 3.064,
6361
  "step": 39700
6362
+ },
6363
+ {
6364
+ "epoch": 16.75,
6365
+ "grad_norm": 3.9978723526000977,
6366
+ "learning_rate": 2.223886925795053e-05,
6367
+ "loss": 0.7286,
6368
+ "step": 39800
6369
+ },
6370
+ {
6371
+ "epoch": 16.75,
6372
+ "eval_cer": 0.3679216690782527,
6373
+ "eval_loss": 1.6313321590423584,
6374
+ "eval_runtime": 398.1769,
6375
+ "eval_samples_per_second": 23.803,
6376
+ "eval_steps_per_second": 2.976,
6377
+ "step": 39800
6378
+ },
6379
+ {
6380
+ "epoch": 16.8,
6381
+ "grad_norm": 1.9248440265655518,
6382
+ "learning_rate": 2.2168197879858656e-05,
6383
+ "loss": 0.7211,
6384
+ "step": 39900
6385
+ },
6386
+ {
6387
+ "epoch": 16.8,
6388
+ "eval_cer": 0.365851452817645,
6389
+ "eval_loss": 1.687284231185913,
6390
+ "eval_runtime": 376.8233,
6391
+ "eval_samples_per_second": 25.152,
6392
+ "eval_steps_per_second": 3.145,
6393
+ "step": 39900
6394
  }
6395
  ],
6396
  "logging_steps": 100,
 
6398
  "num_input_tokens_seen": 0,
6399
  "num_train_epochs": 30,
6400
  "save_steps": 100,
6401
+ "total_flos": 4.3726588831730395e+20,
6402
  "train_batch_size": 8,
6403
  "trial_name": null,
6404
  "trial_params": null