plip commited on
Commit
563786f
1 Parent(s): c38daa5

Training in progress, step 340000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b12d2d3c7e9c65fa1cdfdb5c2f4716c387d264ad1a9842602482f2263f5d461
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9697a4208e91543945338a4f4e4b0865355f4f56763ee61fbd92200c086baa3e
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe8e1879944525f3e680f804773ec89ad33764384906428a52a2c3c6871bf701
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724b6d72f0d9d61428cae11f63e8d73911644650c2b3a36828f03dde6fa5b8fd
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf9af55dec33596d4e4eab80f0d6e856e790d4e6b88e6dd783d976e786aca8d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0f100ea6da4a9ff03fc2cc9b0cbbba13b42ccc41293a1d6143e5a081a97f70
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.688902350940143,
5
- "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6606,11 +6606,211 @@
6606
  "eval_samples_per_second": 1519.716,
6607
  "eval_steps_per_second": 24.199,
6608
  "step": 330000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6609
  }
6610
  ],
6611
  "max_steps": 500000,
6612
  "num_train_epochs": 12,
6613
- "total_flos": 1.0543058100622153e+22,
6614
  "trial_name": null,
6615
  "trial_params": null
6616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.921899391877723,
5
+ "global_step": 340000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6606
  "eval_samples_per_second": 1519.716,
6607
  "eval_steps_per_second": 24.199,
6608
  "step": 330000
6609
+ },
6610
+ {
6611
+ "epoch": 7.7,
6612
+ "learning_rate": 9.196345158526793e-05,
6613
+ "loss": 0.2632,
6614
+ "step": 330500
6615
+ },
6616
+ {
6617
+ "epoch": 7.71,
6618
+ "learning_rate": 9.153197361721149e-05,
6619
+ "loss": 0.263,
6620
+ "step": 331000
6621
+ },
6622
+ {
6623
+ "epoch": 7.71,
6624
+ "eval_loss": 0.24357490241527557,
6625
+ "eval_runtime": 1.4413,
6626
+ "eval_samples_per_second": 1524.993,
6627
+ "eval_steps_per_second": 24.283,
6628
+ "step": 331000
6629
+ },
6630
+ {
6631
+ "epoch": 7.72,
6632
+ "learning_rate": 9.110118972532302e-05,
6633
+ "loss": 0.2627,
6634
+ "step": 331500
6635
+ },
6636
+ {
6637
+ "epoch": 7.74,
6638
+ "learning_rate": 9.067110462058634e-05,
6639
+ "loss": 0.2631,
6640
+ "step": 332000
6641
+ },
6642
+ {
6643
+ "epoch": 7.74,
6644
+ "eval_loss": 0.2456502616405487,
6645
+ "eval_runtime": 1.4523,
6646
+ "eval_samples_per_second": 1513.463,
6647
+ "eval_steps_per_second": 24.1,
6648
+ "step": 332000
6649
+ },
6650
+ {
6651
+ "epoch": 7.75,
6652
+ "learning_rate": 9.024172300634305e-05,
6653
+ "loss": 0.2629,
6654
+ "step": 332500
6655
+ },
6656
+ {
6657
+ "epoch": 7.76,
6658
+ "learning_rate": 8.981304957824182e-05,
6659
+ "loss": 0.2629,
6660
+ "step": 333000
6661
+ },
6662
+ {
6663
+ "epoch": 7.76,
6664
+ "eval_loss": 0.2470385730266571,
6665
+ "eval_runtime": 1.4527,
6666
+ "eval_samples_per_second": 1513.011,
6667
+ "eval_steps_per_second": 24.093,
6668
+ "step": 333000
6669
+ },
6670
+ {
6671
+ "epoch": 7.77,
6672
+ "learning_rate": 8.938508902418643e-05,
6673
+ "loss": 0.2623,
6674
+ "step": 333500
6675
+ },
6676
+ {
6677
+ "epoch": 7.78,
6678
+ "learning_rate": 8.89578460242851e-05,
6679
+ "loss": 0.2626,
6680
+ "step": 334000
6681
+ },
6682
+ {
6683
+ "epoch": 7.78,
6684
+ "eval_loss": 0.24880406260490417,
6685
+ "eval_runtime": 1.4245,
6686
+ "eval_samples_per_second": 1543.039,
6687
+ "eval_steps_per_second": 24.571,
6688
+ "step": 334000
6689
+ },
6690
+ {
6691
+ "epoch": 7.79,
6692
+ "learning_rate": 8.85313252507988e-05,
6693
+ "loss": 0.2621,
6694
+ "step": 334500
6695
+ },
6696
+ {
6697
+ "epoch": 7.81,
6698
+ "learning_rate": 8.810553136809027e-05,
6699
+ "loss": 0.2626,
6700
+ "step": 335000
6701
+ },
6702
+ {
6703
+ "epoch": 7.81,
6704
+ "eval_loss": 0.2465057224035263,
6705
+ "eval_runtime": 1.453,
6706
+ "eval_samples_per_second": 1512.782,
6707
+ "eval_steps_per_second": 24.089,
6708
+ "step": 335000
6709
+ },
6710
+ {
6711
+ "epoch": 7.82,
6712
+ "learning_rate": 8.76804690325733e-05,
6713
+ "loss": 0.2622,
6714
+ "step": 335500
6715
+ },
6716
+ {
6717
+ "epoch": 7.83,
6718
+ "learning_rate": 8.725614289266137e-05,
6719
+ "loss": 0.2623,
6720
+ "step": 336000
6721
+ },
6722
+ {
6723
+ "epoch": 7.83,
6724
+ "eval_loss": 0.24810338020324707,
6725
+ "eval_runtime": 1.4394,
6726
+ "eval_samples_per_second": 1526.974,
6727
+ "eval_steps_per_second": 24.315,
6728
+ "step": 336000
6729
+ },
6730
+ {
6731
+ "epoch": 7.84,
6732
+ "learning_rate": 8.683255758871734e-05,
6733
+ "loss": 0.2627,
6734
+ "step": 336500
6735
+ },
6736
+ {
6737
+ "epoch": 7.85,
6738
+ "learning_rate": 8.640971775300207e-05,
6739
+ "loss": 0.2624,
6740
+ "step": 337000
6741
+ },
6742
+ {
6743
+ "epoch": 7.85,
6744
+ "eval_loss": 0.24493196606636047,
6745
+ "eval_runtime": 1.4281,
6746
+ "eval_samples_per_second": 1539.133,
6747
+ "eval_steps_per_second": 24.508,
6748
+ "step": 337000
6749
+ },
6750
+ {
6751
+ "epoch": 7.86,
6752
+ "learning_rate": 8.598762800962431e-05,
6753
+ "loss": 0.2625,
6754
+ "step": 337500
6755
+ },
6756
+ {
6757
+ "epoch": 7.88,
6758
+ "learning_rate": 8.55662929744899e-05,
6759
+ "loss": 0.2623,
6760
+ "step": 338000
6761
+ },
6762
+ {
6763
+ "epoch": 7.88,
6764
+ "eval_loss": 0.2459965944290161,
6765
+ "eval_runtime": 1.463,
6766
+ "eval_samples_per_second": 1502.342,
6767
+ "eval_steps_per_second": 23.923,
6768
+ "step": 338000
6769
+ },
6770
+ {
6771
+ "epoch": 7.89,
6772
+ "learning_rate": 8.514571725525124e-05,
6773
+ "loss": 0.2626,
6774
+ "step": 338500
6775
+ },
6776
+ {
6777
+ "epoch": 7.9,
6778
+ "learning_rate": 8.47259054512571e-05,
6779
+ "loss": 0.2626,
6780
+ "step": 339000
6781
+ },
6782
+ {
6783
+ "epoch": 7.9,
6784
+ "eval_loss": 0.24472270905971527,
6785
+ "eval_runtime": 1.4079,
6786
+ "eval_samples_per_second": 1561.143,
6787
+ "eval_steps_per_second": 24.859,
6788
+ "step": 339000
6789
+ },
6790
+ {
6791
+ "epoch": 7.91,
6792
+ "learning_rate": 8.430686215350198e-05,
6793
+ "loss": 0.2624,
6794
+ "step": 339500
6795
+ },
6796
+ {
6797
+ "epoch": 7.92,
6798
+ "learning_rate": 8.388859194457636e-05,
6799
+ "loss": 0.262,
6800
+ "step": 340000
6801
+ },
6802
+ {
6803
+ "epoch": 7.92,
6804
+ "eval_loss": 0.24556826055049896,
6805
+ "eval_runtime": 1.3884,
6806
+ "eval_samples_per_second": 1583.125,
6807
+ "eval_steps_per_second": 25.209,
6808
+ "step": 340000
6809
  }
6810
  ],
6811
  "max_steps": 500000,
6812
  "num_train_epochs": 12,
6813
+ "total_flos": 1.0862545239396653e+22,
6814
  "trial_name": null,
6815
  "trial_params": null
6816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe8e1879944525f3e680f804773ec89ad33764384906428a52a2c3c6871bf701
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724b6d72f0d9d61428cae11f63e8d73911644650c2b3a36828f03dde6fa5b8fd
3
  size 102501541