ashanhr commited on
Commit
4552077
1 Parent(s): eae20ee

Training in progress, step 3400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf71b0c4b5b42be2c849af7ec687de0ddfb09171ee59dc0c9b9e95435acb0ae
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a09ae00ebc62ebcd9a090efa8346b0b355da09866b115ffafeb9415b7348a1f
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e23c99508fe49e7d270ef25d2b0bfddea927c361d4b4b369364b31eb5bfb76aa
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72774ec6409b39c81c1aa220704d91224c558c6ea5b86aa374ce4de5fd8e4117
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:794316e4b1b49164c3c1b912b03bc9e5c56a1bde3e75f18b56f92a8c3ef576cd
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d8a703e7b68d5ff0f9ab10e9d171bd7ce6ce70b6e4e532a37f2b2de5240614
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa8f6a1dd5101e10414474b080f8f573bd80f1bc9e5c32a51cbba518e254664
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8426d4aae6ad94244099d33cc22d6aea5fa1d82b7f3fc974721970e8c187bc
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9caf1980567e30be4f62497610fa1f8c611b6eb31004cde77b37b2a91c8900c9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cdf3746ad5565e79e6e1cb78e78a4ab2fa5d41524e5a44428479a58b87e840e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3470848242475268,
5
  "eval_steps": 100,
6
- "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -519,6 +519,38 @@
519
  "eval_samples_per_second": 27.548,
520
  "eval_steps_per_second": 3.444,
521
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  }
523
  ],
524
  "logging_steps": 100,
@@ -526,7 +558,7 @@
526
  "num_input_tokens_seen": 0,
527
  "num_train_epochs": 30,
528
  "save_steps": 100,
529
- "total_flos": 3.518283860575115e+19,
530
  "train_batch_size": 8,
531
  "trial_name": null,
532
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4312776257629973,
5
  "eval_steps": 100,
6
+ "global_step": 3400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
519
  "eval_samples_per_second": 27.548,
520
  "eval_steps_per_second": 3.444,
521
  "step": 3200
522
+ },
523
+ {
524
+ "epoch": 1.39,
525
+ "grad_norm": 3.094930648803711,
526
+ "learning_rate": 4.8026148409894e-05,
527
+ "loss": 2.2166,
528
+ "step": 3300
529
+ },
530
+ {
531
+ "epoch": 1.39,
532
+ "eval_cer": 0.5857514371749247,
533
+ "eval_loss": 3.2282423973083496,
534
+ "eval_runtime": 358.5567,
535
+ "eval_samples_per_second": 26.434,
536
+ "eval_steps_per_second": 3.305,
537
+ "step": 3300
538
+ },
539
+ {
540
+ "epoch": 1.43,
541
+ "grad_norm": 5.626856803894043,
542
+ "learning_rate": 4.795547703180212e-05,
543
+ "loss": 2.6222,
544
+ "step": 3400
545
+ },
546
+ {
547
+ "epoch": 1.43,
548
+ "eval_cer": 0.5495043213014743,
549
+ "eval_loss": 1.7686785459518433,
550
+ "eval_runtime": 342.362,
551
+ "eval_samples_per_second": 27.684,
552
+ "eval_steps_per_second": 3.461,
553
+ "step": 3400
554
  }
555
  ],
556
  "logging_steps": 100,
 
558
  "num_input_tokens_seen": 0,
559
  "num_train_epochs": 30,
560
  "save_steps": 100,
561
+ "total_flos": 3.7374716231040434e+19,
562
  "train_batch_size": 8,
563
  "trial_name": null,
564
  "trial_params": null