NairaRahim commited on
Commit
06ff273
·
verified ·
1 Parent(s): 444a3b7

Training in progress, epoch 78, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5ab75838ed7afe0c288b3631cba017faa548802aa5787f848080d63a4faa88d
3
  size 559424792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c9f69a173a892667d6dee7aaad371f40e7baf8cb3e9d969f1b6dcde4a515f1
3
  size 559424792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09965c1441b1cd1585c57f2d17e8d0e016910d7c90ad54c3a3afac8664c3bfd
3
  size 1118926970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b013c178fe9fdbc20f357d276fbc376ffeb5ae200d4656a25c0840d60905025
3
  size 1118926970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eddd447be293e64952cf96e8afe87fbb852cea08cd9ba63106be57df271d8556
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:325b7db62095a95388ccc4086fa74938caa1cf51efa7e26a65d55b88f9c20149
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc365eae5b467a6c47056a0e754f854a75cac7f154e6d27e155fce16f2e6ebd0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43652567e6245845fba84f7f23bb5bb43e6dfe5f4e79e8b80a87c4194eca0dd2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
- "epoch": 77.0,
5
  "eval_steps": 500,
6
- "global_step": 100485,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7651,6 +7651,105 @@
7651
  "eval_samples_per_second": 29.542,
7652
  "eval_steps_per_second": 3.712,
7653
  "step": 100485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7654
  }
7655
  ],
7656
  "logging_steps": 100,
@@ -7665,7 +7764,7 @@
7665
  "early_stopping_threshold": 0.0
7666
  },
7667
  "attributes": {
7668
- "early_stopping_patience_counter": 9
7669
  }
7670
  },
7671
  "TrainerControl": {
@@ -7674,12 +7773,12 @@
7674
  "should_evaluate": false,
7675
  "should_log": false,
7676
  "should_save": true,
7677
- "should_training_stop": false
7678
  },
7679
  "attributes": {}
7680
  }
7681
  },
7682
- "total_flos": 4.681650983960218e+16,
7683
  "train_batch_size": 8,
7684
  "trial_name": null,
7685
  "trial_params": null
 
1
  {
2
  "best_metric": 12.716951370239258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
4
+ "epoch": 78.0,
5
  "eval_steps": 500,
6
+ "global_step": 101790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7651
  "eval_samples_per_second": 29.542,
7652
  "eval_steps_per_second": 3.712,
7653
  "step": 100485
7654
+ },
7655
+ {
7656
+ "epoch": 77.01149425287356,
7657
+ "grad_norm": 1.5276216268539429,
7658
+ "learning_rate": 1.8869731800766285e-06,
7659
+ "loss": 12.0855,
7660
+ "step": 100500
7661
+ },
7662
+ {
7663
+ "epoch": 77.08812260536398,
7664
+ "grad_norm": 1.6150214672088623,
7665
+ "learning_rate": 1.839080459770115e-06,
7666
+ "loss": 12.0257,
7667
+ "step": 100600
7668
+ },
7669
+ {
7670
+ "epoch": 77.16475095785441,
7671
+ "grad_norm": 1.33854341506958,
7672
+ "learning_rate": 1.7911877394636015e-06,
7673
+ "loss": 11.8612,
7674
+ "step": 100700
7675
+ },
7676
+ {
7677
+ "epoch": 77.24137931034483,
7678
+ "grad_norm": 3.1214921474456787,
7679
+ "learning_rate": 1.7432950191570881e-06,
7680
+ "loss": 11.8497,
7681
+ "step": 100800
7682
+ },
7683
+ {
7684
+ "epoch": 77.31800766283524,
7685
+ "grad_norm": 1.6240577697753906,
7686
+ "learning_rate": 1.695402298850575e-06,
7687
+ "loss": 11.7641,
7688
+ "step": 100900
7689
+ },
7690
+ {
7691
+ "epoch": 77.39463601532567,
7692
+ "grad_norm": 1.1317625045776367,
7693
+ "learning_rate": 1.6475095785440615e-06,
7694
+ "loss": 11.846,
7695
+ "step": 101000
7696
+ },
7697
+ {
7698
+ "epoch": 77.47126436781609,
7699
+ "grad_norm": 1.294534683227539,
7700
+ "learning_rate": 1.599616858237548e-06,
7701
+ "loss": 11.7364,
7702
+ "step": 101100
7703
+ },
7704
+ {
7705
+ "epoch": 77.5478927203065,
7706
+ "grad_norm": 1.099564790725708,
7707
+ "learning_rate": 1.5517241379310346e-06,
7708
+ "loss": 11.8554,
7709
+ "step": 101200
7710
+ },
7711
+ {
7712
+ "epoch": 77.62452107279694,
7713
+ "grad_norm": 1.1482292413711548,
7714
+ "learning_rate": 1.503831417624521e-06,
7715
+ "loss": 11.9346,
7716
+ "step": 101300
7717
+ },
7718
+ {
7719
+ "epoch": 77.70114942528735,
7720
+ "grad_norm": 1.198670506477356,
7721
+ "learning_rate": 1.4559386973180077e-06,
7722
+ "loss": 11.8685,
7723
+ "step": 101400
7724
+ },
7725
+ {
7726
+ "epoch": 77.77777777777777,
7727
+ "grad_norm": 2.162407159805298,
7728
+ "learning_rate": 1.4080459770114944e-06,
7729
+ "loss": 11.8622,
7730
+ "step": 101500
7731
+ },
7732
+ {
7733
+ "epoch": 77.8544061302682,
7734
+ "grad_norm": 1.3662519454956055,
7735
+ "learning_rate": 1.3601532567049808e-06,
7736
+ "loss": 11.9364,
7737
+ "step": 101600
7738
+ },
7739
+ {
7740
+ "epoch": 77.93103448275862,
7741
+ "grad_norm": 1.1566288471221924,
7742
+ "learning_rate": 1.3122605363984675e-06,
7743
+ "loss": 11.7345,
7744
+ "step": 101700
7745
+ },
7746
+ {
7747
+ "epoch": 78.0,
7748
+ "eval_loss": 12.726273536682129,
7749
+ "eval_runtime": 44.1782,
7750
+ "eval_samples_per_second": 29.539,
7751
+ "eval_steps_per_second": 3.712,
7752
+ "step": 101790
7753
  }
7754
  ],
7755
  "logging_steps": 100,
 
7764
  "early_stopping_threshold": 0.0
7765
  },
7766
  "attributes": {
7767
+ "early_stopping_patience_counter": 10
7768
  }
7769
  },
7770
  "TrainerControl": {
 
7773
  "should_evaluate": false,
7774
  "should_log": false,
7775
  "should_save": true,
7776
+ "should_training_stop": true
7777
  },
7778
  "attributes": {}
7779
  }
7780
  },
7781
+ "total_flos": 4.742440753614336e+16,
7782
  "train_batch_size": 8,
7783
  "trial_name": null,
7784
  "trial_params": null