mgh6 commited on
Commit
9b7d7e6
·
verified ·
1 Parent(s): 44374d9

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba163da7ac5c05dca7b23ba9ae84e43000a41894b62d4b0bd6c156263a09d667
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a664102d1b3ac5fecaadaaf9968da194c74be4d99b0e5648fc5ba6e5edbbd53
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:228620246956b9c20bcf1b4373421a28779315491724ca8b3e1ccf5acdfefaac
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d77595fb689b5c663dc6d92d024119a8b254d933443aff4cf5d2c4e893f1277
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2d83df56a301a7a031ee1a2baca538bacda8ebe202ee2460e255209acd4857
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdbbc81f1577c58564d520539f0ecd1e3c63b150d117eedae0016db0ec8a85c
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5bdf0f87d77b7de3138456a39226e05b8a47b0bad9a0f4dd8d726aafd08598b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6602c18a1ebe894c1d51ce5c9cea3744db091c466423f123d4fa8b7754d9378a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.998966111430212,
5
  "eval_steps": 50,
6
- "global_step": 2448,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -727,6 +727,96 @@
727
  "eval_samples_per_second": 41.601,
728
  "eval_steps_per_second": 20.801,
729
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 50,
@@ -741,12 +831,12 @@
741
  "should_evaluate": false,
742
  "should_log": false,
743
  "should_save": true,
744
- "should_training_stop": false
745
  },
746
  "attributes": {}
747
  }
748
  },
749
- "total_flos": 6.316931268340613e+17,
750
  "train_batch_size": 2,
751
  "trial_name": null,
752
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.998851234922459,
5
  "eval_steps": 50,
6
+ "global_step": 2720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
727
  "eval_samples_per_second": 41.601,
728
  "eval_steps_per_second": 20.801,
729
  "step": 2400
730
+ },
731
+ {
732
+ "epoch": 9.00631820792648,
733
+ "grad_norm": 69.84994506835938,
734
+ "learning_rate": 9.926470588235293e-06,
735
+ "loss": 0.8202,
736
+ "step": 2450
737
+ },
738
+ {
739
+ "epoch": 9.00631820792648,
740
+ "eval_loss": 1.2383702993392944,
741
+ "eval_runtime": 116.7345,
742
+ "eval_samples_per_second": 41.342,
743
+ "eval_steps_per_second": 20.671,
744
+ "step": 2450
745
+ },
746
+ {
747
+ "epoch": 9.190120620333142,
748
+ "grad_norm": 75.23961639404297,
749
+ "learning_rate": 8.088235294117648e-06,
750
+ "loss": 0.8143,
751
+ "step": 2500
752
+ },
753
+ {
754
+ "epoch": 9.190120620333142,
755
+ "eval_loss": 1.236066460609436,
756
+ "eval_runtime": 115.996,
757
+ "eval_samples_per_second": 41.605,
758
+ "eval_steps_per_second": 20.802,
759
+ "step": 2500
760
+ },
761
+ {
762
+ "epoch": 9.373923032739805,
763
+ "grad_norm": 62.9267692565918,
764
+ "learning_rate": 6.25e-06,
765
+ "loss": 0.8106,
766
+ "step": 2550
767
+ },
768
+ {
769
+ "epoch": 9.373923032739805,
770
+ "eval_loss": 1.23640775680542,
771
+ "eval_runtime": 115.848,
772
+ "eval_samples_per_second": 41.658,
773
+ "eval_steps_per_second": 20.829,
774
+ "step": 2550
775
+ },
776
+ {
777
+ "epoch": 9.557725445146467,
778
+ "grad_norm": 54.76566696166992,
779
+ "learning_rate": 4.411764705882353e-06,
780
+ "loss": 0.8144,
781
+ "step": 2600
782
+ },
783
+ {
784
+ "epoch": 9.557725445146467,
785
+ "eval_loss": 1.2419943809509277,
786
+ "eval_runtime": 116.0232,
787
+ "eval_samples_per_second": 41.595,
788
+ "eval_steps_per_second": 20.798,
789
+ "step": 2600
790
+ },
791
+ {
792
+ "epoch": 9.74152785755313,
793
+ "grad_norm": 51.20401382446289,
794
+ "learning_rate": 2.573529411764706e-06,
795
+ "loss": 0.8061,
796
+ "step": 2650
797
+ },
798
+ {
799
+ "epoch": 9.74152785755313,
800
+ "eval_loss": 1.2368206977844238,
801
+ "eval_runtime": 116.069,
802
+ "eval_samples_per_second": 41.579,
803
+ "eval_steps_per_second": 20.789,
804
+ "step": 2650
805
+ },
806
+ {
807
+ "epoch": 9.925330269959794,
808
+ "grad_norm": 52.51292419433594,
809
+ "learning_rate": 7.352941176470589e-07,
810
+ "loss": 0.8122,
811
+ "step": 2700
812
+ },
813
+ {
814
+ "epoch": 9.925330269959794,
815
+ "eval_loss": 1.2355531454086304,
816
+ "eval_runtime": 116.1057,
817
+ "eval_samples_per_second": 41.566,
818
+ "eval_steps_per_second": 20.783,
819
+ "step": 2700
820
  }
821
  ],
822
  "logging_steps": 50,
 
831
  "should_evaluate": false,
832
  "should_log": false,
833
  "should_save": true,
834
+ "should_training_stop": true
835
  },
836
  "attributes": {}
837
  }
838
  },
839
+ "total_flos": 7.018175725001769e+17,
840
  "train_batch_size": 2,
841
  "trial_name": null,
842
  "trial_params": null