mgh6 commited on
Commit
7465631
·
verified ·
1 Parent(s): 578c50e

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79ecb58c1fcba639498eb4bb8f9fd11485e8f410da635bffd7990f7d24a9ad84
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e82e82f374c2673ef04a502ab788d5b3699ba02ae9cbb612822d23f1048aaa
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc51d878f8242e2efc63c0c0a3e6c6b8ebb1c5eedd276b9fd4ca5863d4b4c44c
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a72e36fd2dbdba8586fcbf48397a69a66a780b5217bacb7a4c129bba516b9e
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11f1252e969592bce36bc2e2fc4eed6af06892f0a3f45eb582be003ac5046ad5
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2168e6be520a803e330b14854aa047c840fbbc36e1cd7f9a8956c981c5afc55f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37acfbdf3414734e537adb979fbdbc4d04a389a43d3107d724270efe19fa191f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6602c18a1ebe894c1d51ce5c9cea3744db091c466423f123d4fa8b7754d9378a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
  "eval_steps": 50,
6
- "global_step": 2457,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -742,6 +742,81 @@
742
  "eval_samples_per_second": 41.573,
743
  "eval_steps_per_second": 20.786,
744
  "step": 2450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
  }
746
  ],
747
  "logging_steps": 50,
@@ -756,12 +831,12 @@
756
  "should_evaluate": false,
757
  "should_log": false,
758
  "should_save": true,
759
- "should_training_stop": false
760
  },
761
  "attributes": {}
762
  }
763
  },
764
- "total_flos": 6.316931282433475e+17,
765
  "train_batch_size": 2,
766
  "trial_name": null,
767
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.966800689259047,
5
  "eval_steps": 50,
6
+ "global_step": 2720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
742
  "eval_samples_per_second": 41.573,
743
  "eval_steps_per_second": 20.786,
744
  "step": 2450
745
+ },
746
+ {
747
+ "epoch": 9.15807007466973,
748
+ "grad_norm": 55.69232940673828,
749
+ "learning_rate": 8.088235294117648e-06,
750
+ "loss": 0.7995,
751
+ "step": 2500
752
+ },
753
+ {
754
+ "epoch": 9.15807007466973,
755
+ "eval_loss": 1.2360199689865112,
756
+ "eval_runtime": 116.0847,
757
+ "eval_samples_per_second": 41.573,
758
+ "eval_steps_per_second": 20.787,
759
+ "step": 2500
760
+ },
761
+ {
762
+ "epoch": 9.341872487076392,
763
+ "grad_norm": 62.24937438964844,
764
+ "learning_rate": 6.25e-06,
765
+ "loss": 0.8149,
766
+ "step": 2550
767
+ },
768
+ {
769
+ "epoch": 9.341872487076392,
770
+ "eval_loss": 1.2363650798797607,
771
+ "eval_runtime": 116.0508,
772
+ "eval_samples_per_second": 41.585,
773
+ "eval_steps_per_second": 20.793,
774
+ "step": 2550
775
+ },
776
+ {
777
+ "epoch": 9.525674899483056,
778
+ "grad_norm": 50.01460266113281,
779
+ "learning_rate": 4.411764705882353e-06,
780
+ "loss": 0.8146,
781
+ "step": 2600
782
+ },
783
+ {
784
+ "epoch": 9.525674899483056,
785
+ "eval_loss": 1.2402119636535645,
786
+ "eval_runtime": 116.1442,
787
+ "eval_samples_per_second": 41.552,
788
+ "eval_steps_per_second": 20.776,
789
+ "step": 2600
790
+ },
791
+ {
792
+ "epoch": 9.709477311889719,
793
+ "grad_norm": 60.61581802368164,
794
+ "learning_rate": 2.573529411764706e-06,
795
+ "loss": 0.8075,
796
+ "step": 2650
797
+ },
798
+ {
799
+ "epoch": 9.709477311889719,
800
+ "eval_loss": 1.2341493368148804,
801
+ "eval_runtime": 116.3546,
802
+ "eval_samples_per_second": 41.477,
803
+ "eval_steps_per_second": 20.738,
804
+ "step": 2650
805
+ },
806
+ {
807
+ "epoch": 9.89327972429638,
808
+ "grad_norm": 54.73764419555664,
809
+ "learning_rate": 7.352941176470589e-07,
810
+ "loss": 0.8108,
811
+ "step": 2700
812
+ },
813
+ {
814
+ "epoch": 9.89327972429638,
815
+ "eval_loss": 1.2355538606643677,
816
+ "eval_runtime": 116.1282,
817
+ "eval_samples_per_second": 41.558,
818
+ "eval_steps_per_second": 20.779,
819
+ "step": 2700
820
  }
821
  ],
822
  "logging_steps": 50,
 
831
  "should_evaluate": false,
832
  "should_log": false,
833
  "should_save": true,
834
+ "should_training_stop": true
835
  },
836
  "attributes": {}
837
  }
838
  },
839
+ "total_flos": 6.99559007609684e+17,
840
  "train_batch_size": 2,
841
  "trial_name": null,
842
  "trial_params": null