ashanhr commited on
Commit
4c69a8b
·
verified ·
1 Parent(s): 1a37006

Training in progress, step 33700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebbf03b6a39b6b310c792fd4ce9a56b4feab22bc4d8aae7c67e0b23549ec3844
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bdec49f5ba937575c37201bec00db52b8c3e844821eecc385ba239a33663f10
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5322792e8472158c629b41c8ed853c14c036430212aa2395b1962b71dd6688c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619c1c99184b876365ca973837576037ba7b39716899416a2f1019041043ea17
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41f6c73349fb0a4922b7c514625c8f319c04d8a3281a94dc80e47d5905a270b7
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0729b7a58599ea8921b314d9c9621e535f2d82f309864faa4b6d052417b8b263
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08b7a405079c06c2e1179546e5ade586e1754239d626d4047688ddad0abeac41
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44661c431a879e3226aadc89367134cd0b53d52f6af54472aaa115083d69b12e
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c05bab47528958ca030cc3c2eb992ca93a53fcef5ee7de67d8809ec8b54d00
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a48819e7745c34bedd45ae62b08be3d10aa46039968cd3e6efddcf11d1d5426
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.102294253841297,
5
  "eval_steps": 100,
6
- "global_step": 33500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5367,6 +5367,38 @@
5367
  "eval_samples_per_second": 25.476,
5368
  "eval_steps_per_second": 3.185,
5369
  "step": 33500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5370
  }
5371
  ],
5372
  "logging_steps": 100,
@@ -5374,7 +5406,7 @@
5374
  "num_input_tokens_seen": 0,
5375
  "num_train_epochs": 30,
5376
  "save_steps": 100,
5377
- "total_flos": 3.670837467873283e+20,
5378
  "train_batch_size": 8,
5379
  "trial_name": null,
5380
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.186487055356768,
5
  "eval_steps": 100,
6
+ "global_step": 33700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5367
  "eval_samples_per_second": 25.476,
5368
  "eval_steps_per_second": 3.185,
5369
  "step": 33500
5370
+ },
5371
+ {
5372
+ "epoch": 14.14,
5373
+ "grad_norm": 2.114933729171753,
5374
+ "learning_rate": 2.6619787985865723e-05,
5375
+ "loss": 0.9445,
5376
+ "step": 33600
5377
+ },
5378
+ {
5379
+ "epoch": 14.14,
5380
+ "eval_cer": 0.3915470650346095,
5381
+ "eval_loss": 2.192039728164673,
5382
+ "eval_runtime": 401.3991,
5383
+ "eval_samples_per_second": 23.612,
5384
+ "eval_steps_per_second": 2.952,
5385
+ "step": 33600
5386
+ },
5387
+ {
5388
+ "epoch": 14.19,
5389
+ "grad_norm": 22.767732620239258,
5390
+ "learning_rate": 2.6549116607773855e-05,
5391
+ "loss": 0.9435,
5392
+ "step": 33700
5393
+ },
5394
+ {
5395
+ "epoch": 14.19,
5396
+ "eval_cer": 0.39616655586406474,
5397
+ "eval_loss": 2.0086452960968018,
5398
+ "eval_runtime": 374.9441,
5399
+ "eval_samples_per_second": 25.278,
5400
+ "eval_steps_per_second": 3.16,
5401
+ "step": 33700
5402
  }
5403
  ],
5404
  "logging_steps": 100,
 
5406
  "num_input_tokens_seen": 0,
5407
  "num_train_epochs": 30,
5408
  "save_steps": 100,
5409
+ "total_flos": 3.693186041855623e+20,
5410
  "train_batch_size": 8,
5411
  "trial_name": null,
5412
  "trial_params": null