Krisbiantoro commited on
Commit
de5d682
·
1 Parent(s): 1fa4da1

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +2 -2
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +750 -2
  6. training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb41ed248b38a0d5c018c6a8583bdfba80b6d2fa606506f6813c43958a99bbf
3
  size 75641741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af4c3a65b7a773b24b082fa3d2bdf3b889f0c61459c6c6dc53f6e339785bcd4
3
  size 75641741
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:446b771342b713e07f4acb65267ef0679ef125566f0de980a5ac69b427e4f799
3
- size 151224453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:461887c9ec08fe4b1692b9fa6def1741d6a60b64bb08c37d3fdc064aa795bd26
3
+ size 151222021
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:972139d83957a9cf2600cb6eeca17287d7a5377c33a53500ae7e13fe830ad36b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdda7482499b855a06935901db3ef5c0346fd12eb58a510bec30c9e4dab13b1e
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f03c5a9d9fd80414287e17e83b0d9b80cfdcf5ad7a4a9d63da800e7a44f10384
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:749e3338e97b1ab6783e7f614fd50b2475fd9e06f92f74c2d73b555d52907729
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09665104141497124,
5
  "eval_steps": 200,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -739,6 +739,754 @@
739
  "rewards/margins": -0.20669928193092346,
740
  "rewards/rejected": -5.946280479431152,
741
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  }
743
  ],
744
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.19331142470520007,
5
  "eval_steps": 200,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
739
  "rewards/margins": -0.20669928193092346,
740
  "rewards/rejected": -5.946280479431152,
741
  "step": 500
742
+ },
743
+ {
744
+ "epoch": 0.1,
745
+ "learning_rate": 0.0004777992277992278,
746
+ "logits/chosen": -2.692495584487915,
747
+ "logits/rejected": -2.6421055793762207,
748
+ "logps/chosen": -172.01806640625,
749
+ "logps/rejected": -165.9178466796875,
750
+ "loss": 1.2994,
751
+ "rewards/accuracies": 0.574999988079071,
752
+ "rewards/chosen": -4.898122787475586,
753
+ "rewards/margins": 0.04086846113204956,
754
+ "rewards/rejected": -4.938991546630859,
755
+ "step": 510
756
+ },
757
+ {
758
+ "epoch": 0.1,
759
+ "learning_rate": 0.0004874517374517375,
760
+ "logits/chosen": -2.7154297828674316,
761
+ "logits/rejected": -2.5865087509155273,
762
+ "logps/chosen": -134.430419921875,
763
+ "logps/rejected": -144.77151489257812,
764
+ "loss": 1.0245,
765
+ "rewards/accuracies": 0.574999988079071,
766
+ "rewards/chosen": -3.9665591716766357,
767
+ "rewards/margins": 0.17584654688835144,
768
+ "rewards/rejected": -4.1424055099487305,
769
+ "step": 520
770
+ },
771
+ {
772
+ "epoch": 0.1,
773
+ "learning_rate": 0.0004961389961389962,
774
+ "logits/chosen": -2.863107442855835,
775
+ "logits/rejected": -2.8306002616882324,
776
+ "logps/chosen": -269.8907165527344,
777
+ "logps/rejected": -273.8556823730469,
778
+ "loss": 3.3909,
779
+ "rewards/accuracies": 0.48750001192092896,
780
+ "rewards/chosen": -13.715133666992188,
781
+ "rewards/margins": 0.3205181956291199,
782
+ "rewards/rejected": -14.035652160644531,
783
+ "step": 530
784
+ },
785
+ {
786
+ "epoch": 0.1,
787
+ "learning_rate": 0.0004999979503849796,
788
+ "logits/chosen": -3.0216221809387207,
789
+ "logits/rejected": -3.014930248260498,
790
+ "logps/chosen": -194.19422912597656,
791
+ "logps/rejected": -201.08251953125,
792
+ "loss": 2.3037,
793
+ "rewards/accuracies": 0.6000000238418579,
794
+ "rewards/chosen": -7.459778785705566,
795
+ "rewards/margins": -0.22785942256450653,
796
+ "rewards/rejected": -7.231919288635254,
797
+ "step": 540
798
+ },
799
+ {
800
+ "epoch": 0.11,
801
+ "learning_rate": 0.0004999854250815602,
802
+ "logits/chosen": -2.967331647872925,
803
+ "logits/rejected": -2.933845043182373,
804
+ "logps/chosen": -215.81240844726562,
805
+ "logps/rejected": -204.9075927734375,
806
+ "loss": 2.1232,
807
+ "rewards/accuracies": 0.4749999940395355,
808
+ "rewards/chosen": -7.889649868011475,
809
+ "rewards/margins": 0.17769476771354675,
810
+ "rewards/rejected": -8.067344665527344,
811
+ "step": 550
812
+ },
813
+ {
814
+ "epoch": 0.11,
815
+ "learning_rate": 0.00049996151371953,
816
+ "logits/chosen": -3.052489757537842,
817
+ "logits/rejected": -3.0332350730895996,
818
+ "logps/chosen": -181.4810791015625,
819
+ "logps/rejected": -172.9596405029297,
820
+ "loss": 1.3367,
821
+ "rewards/accuracies": 0.5,
822
+ "rewards/chosen": -5.431277275085449,
823
+ "rewards/margins": 0.03274815157055855,
824
+ "rewards/rejected": -5.464025020599365,
825
+ "step": 560
826
+ },
827
+ {
828
+ "epoch": 0.11,
829
+ "learning_rate": 0.0004999262173879769,
830
+ "logits/chosen": -3.040531635284424,
831
+ "logits/rejected": -3.037515163421631,
832
+ "logps/chosen": -179.90150451660156,
833
+ "logps/rejected": -185.0644989013672,
834
+ "loss": 1.4866,
835
+ "rewards/accuracies": 0.574999988079071,
836
+ "rewards/chosen": -6.061680316925049,
837
+ "rewards/margins": 0.6980069279670715,
838
+ "rewards/rejected": -6.759686470031738,
839
+ "step": 570
840
+ },
841
+ {
842
+ "epoch": 0.11,
843
+ "learning_rate": 0.0004998795376945392,
844
+ "logits/chosen": -3.021232843399048,
845
+ "logits/rejected": -2.9936585426330566,
846
+ "logps/chosen": -175.13389587402344,
847
+ "logps/rejected": -156.2028350830078,
848
+ "loss": 1.7235,
849
+ "rewards/accuracies": 0.375,
850
+ "rewards/chosen": -5.4631123542785645,
851
+ "rewards/margins": -0.5610149502754211,
852
+ "rewards/rejected": -4.902098178863525,
853
+ "step": 580
854
+ },
855
+ {
856
+ "epoch": 0.11,
857
+ "learning_rate": 0.0004998214767653319,
858
+ "logits/chosen": -2.944594621658325,
859
+ "logits/rejected": -3.0027194023132324,
860
+ "logps/chosen": -189.1737823486328,
861
+ "logps/rejected": -175.7019500732422,
862
+ "loss": 1.9699,
863
+ "rewards/accuracies": 0.4749999940395355,
864
+ "rewards/chosen": -6.499063014984131,
865
+ "rewards/margins": -0.3209795355796814,
866
+ "rewards/rejected": -6.178082466125488,
867
+ "step": 590
868
+ },
869
+ {
870
+ "epoch": 0.12,
871
+ "learning_rate": 0.0004997520372448494,
872
+ "logits/chosen": -2.8185458183288574,
873
+ "logits/rejected": -2.798320770263672,
874
+ "logps/chosen": -262.44500732421875,
875
+ "logps/rejected": -245.83889770507812,
876
+ "loss": 3.8268,
877
+ "rewards/accuracies": 0.4749999940395355,
878
+ "rewards/chosen": -12.636558532714844,
879
+ "rewards/margins": -0.5035432577133179,
880
+ "rewards/rejected": -12.133015632629395,
881
+ "step": 600
882
+ },
883
+ {
884
+ "epoch": 0.12,
885
+ "eval_logits/chosen": -2.9935925006866455,
886
+ "eval_logits/rejected": -2.961137533187866,
887
+ "eval_logps/chosen": -184.5051727294922,
888
+ "eval_logps/rejected": -181.61184692382812,
889
+ "eval_loss": 1.3598365783691406,
890
+ "eval_rewards/accuracies": 0.5193312168121338,
891
+ "eval_rewards/chosen": -6.594781875610352,
892
+ "eval_rewards/margins": 0.12169010192155838,
893
+ "eval_rewards/rejected": -6.7164716720581055,
894
+ "eval_runtime": 1314.8357,
895
+ "eval_samples_per_second": 0.728,
896
+ "eval_steps_per_second": 0.728,
897
+ "step": 600
898
+ },
899
+ {
900
+ "epoch": 0.12,
901
+ "learning_rate": 0.0004996712222958462,
902
+ "logits/chosen": -2.9863028526306152,
903
+ "logits/rejected": -2.841834306716919,
904
+ "logps/chosen": -234.49893188476562,
905
+ "logps/rejected": -215.51123046875,
906
+ "loss": 3.4529,
907
+ "rewards/accuracies": 0.574999988079071,
908
+ "rewards/chosen": -11.542932510375977,
909
+ "rewards/margins": -1.085506796836853,
910
+ "rewards/rejected": -10.457425117492676,
911
+ "step": 610
912
+ },
913
+ {
914
+ "epoch": 0.12,
915
+ "learning_rate": 0.0004995790355991916,
916
+ "logits/chosen": -2.9887356758117676,
917
+ "logits/rejected": -2.887108564376831,
918
+ "logps/chosen": -202.21853637695312,
919
+ "logps/rejected": -198.64749145507812,
920
+ "loss": 1.6552,
921
+ "rewards/accuracies": 0.574999988079071,
922
+ "rewards/chosen": -7.725579261779785,
923
+ "rewards/margins": 0.2432982176542282,
924
+ "rewards/rejected": -7.96887731552124,
925
+ "step": 620
926
+ },
927
+ {
928
+ "epoch": 0.12,
929
+ "learning_rate": 0.0004994754813537031,
930
+ "logits/chosen": -3.116293430328369,
931
+ "logits/rejected": -3.126661539077759,
932
+ "logps/chosen": -201.38604736328125,
933
+ "logps/rejected": -192.84515380859375,
934
+ "loss": 1.9923,
935
+ "rewards/accuracies": 0.4749999940395355,
936
+ "rewards/chosen": -8.627758979797363,
937
+ "rewards/margins": -0.4443356990814209,
938
+ "rewards/rejected": -8.183423042297363,
939
+ "step": 630
940
+ },
941
+ {
942
+ "epoch": 0.12,
943
+ "learning_rate": 0.000499372567166064,
944
+ "logits/chosen": -3.480961561203003,
945
+ "logits/rejected": -3.476128339767456,
946
+ "logps/chosen": -226.2942657470703,
947
+ "logps/rejected": -198.20462036132812,
948
+ "loss": 3.5697,
949
+ "rewards/accuracies": 0.4749999940395355,
950
+ "rewards/chosen": -10.370187759399414,
951
+ "rewards/margins": -1.4912974834442139,
952
+ "rewards/rejected": -8.878890037536621,
953
+ "step": 640
954
+ },
955
+ {
956
+ "epoch": 0.13,
957
+ "learning_rate": 0.0004992474279997049,
958
+ "logits/chosen": -3.380039930343628,
959
+ "logits/rejected": -3.3540236949920654,
960
+ "logps/chosen": -183.65228271484375,
961
+ "logps/rejected": -192.43350219726562,
962
+ "loss": 2.0561,
963
+ "rewards/accuracies": 0.512499988079071,
964
+ "rewards/chosen": -6.808587551116943,
965
+ "rewards/margins": 1.2175410985946655,
966
+ "rewards/rejected": -8.026129722595215,
967
+ "step": 650
968
+ },
969
+ {
970
+ "epoch": 0.13,
971
+ "learning_rate": 0.0004991109363882065,
972
+ "logits/chosen": -2.5853641033172607,
973
+ "logits/rejected": -2.6103484630584717,
974
+ "logps/chosen": -464.68310546875,
975
+ "logps/rejected": -447.2250061035156,
976
+ "loss": 11.4738,
977
+ "rewards/accuracies": 0.5625,
978
+ "rewards/chosen": -34.34156036376953,
979
+ "rewards/margins": -1.95975661277771,
980
+ "rewards/rejected": -32.381797790527344,
981
+ "step": 660
982
+ },
983
+ {
984
+ "epoch": 0.13,
985
+ "learning_rate": 0.0004989630985483375,
986
+ "logits/chosen": -2.7174885272979736,
987
+ "logits/rejected": -2.707397937774658,
988
+ "logps/chosen": -447.7940979003906,
989
+ "logps/rejected": -407.99493408203125,
990
+ "loss": 12.9625,
991
+ "rewards/accuracies": 0.4124999940395355,
992
+ "rewards/chosen": -32.60967254638672,
993
+ "rewards/margins": -3.206895112991333,
994
+ "rewards/rejected": -29.40277671813965,
995
+ "step": 670
996
+ },
997
+ {
998
+ "epoch": 0.13,
999
+ "learning_rate": 0.0004988203490218075,
1000
+ "logits/chosen": -2.946742296218872,
1001
+ "logits/rejected": -2.8993031978607178,
1002
+ "logps/chosen": -445.21142578125,
1003
+ "logps/rejected": -432.021240234375,
1004
+ "loss": 10.7741,
1005
+ "rewards/accuracies": 0.48750001192092896,
1006
+ "rewards/chosen": -32.348819732666016,
1007
+ "rewards/margins": -1.2561819553375244,
1008
+ "rewards/rejected": -31.092632293701172,
1009
+ "step": 680
1010
+ },
1011
+ {
1012
+ "epoch": 0.13,
1013
+ "learning_rate": 0.0004986509723258511,
1014
+ "logits/chosen": -3.1020426750183105,
1015
+ "logits/rejected": -3.133068561553955,
1016
+ "logps/chosen": -427.81756591796875,
1017
+ "logps/rejected": -416.2047424316406,
1018
+ "loss": 11.4242,
1019
+ "rewards/accuracies": 0.4375,
1020
+ "rewards/chosen": -31.230037689208984,
1021
+ "rewards/margins": -0.655289351940155,
1022
+ "rewards/rejected": -30.57474708557129,
1023
+ "step": 690
1024
+ },
1025
+ {
1026
+ "epoch": 0.14,
1027
+ "learning_rate": 0.0004984702703514565,
1028
+ "logits/chosen": -3.0160446166992188,
1029
+ "logits/rejected": -3.0138049125671387,
1030
+ "logps/chosen": -433.4644470214844,
1031
+ "logps/rejected": -405.3623046875,
1032
+ "loss": 10.8165,
1033
+ "rewards/accuracies": 0.4749999940395355,
1034
+ "rewards/chosen": -30.836090087890625,
1035
+ "rewards/margins": -2.514590263366699,
1036
+ "rewards/rejected": -28.321496963500977,
1037
+ "step": 700
1038
+ },
1039
+ {
1040
+ "epoch": 0.14,
1041
+ "learning_rate": 0.0004982782513290365,
1042
+ "logits/chosen": -3.1978919506073,
1043
+ "logits/rejected": -3.197380542755127,
1044
+ "logps/chosen": -422.511962890625,
1045
+ "logps/rejected": -402.30938720703125,
1046
+ "loss": 12.8189,
1047
+ "rewards/accuracies": 0.4375,
1048
+ "rewards/chosen": -30.785781860351562,
1049
+ "rewards/margins": -2.3963069915771484,
1050
+ "rewards/rejected": -28.389474868774414,
1051
+ "step": 710
1052
+ },
1053
+ {
1054
+ "epoch": 0.14,
1055
+ "learning_rate": 0.0004980749240044603,
1056
+ "logits/chosen": -3.1342532634735107,
1057
+ "logits/rejected": -3.1338047981262207,
1058
+ "logps/chosen": -403.13494873046875,
1059
+ "logps/rejected": -357.790771484375,
1060
+ "loss": 11.5675,
1061
+ "rewards/accuracies": 0.4000000059604645,
1062
+ "rewards/chosen": -28.892202377319336,
1063
+ "rewards/margins": -3.723827362060547,
1064
+ "rewards/rejected": -25.168371200561523,
1065
+ "step": 720
1066
+ },
1067
+ {
1068
+ "epoch": 0.14,
1069
+ "learning_rate": 0.0004978602976386554,
1070
+ "logits/chosen": -3.0739312171936035,
1071
+ "logits/rejected": -3.0738184452056885,
1072
+ "logps/chosen": -381.2265625,
1073
+ "logps/rejected": -378.4680480957031,
1074
+ "loss": 11.5225,
1075
+ "rewards/accuracies": 0.5,
1076
+ "rewards/chosen": -27.621994018554688,
1077
+ "rewards/margins": -0.9019744992256165,
1078
+ "rewards/rejected": -26.720022201538086,
1079
+ "step": 730
1080
+ },
1081
+ {
1082
+ "epoch": 0.14,
1083
+ "learning_rate": 0.0004976343820071849,
1084
+ "logits/chosen": -3.166983127593994,
1085
+ "logits/rejected": -3.1671650409698486,
1086
+ "logps/chosen": -408.42071533203125,
1087
+ "logps/rejected": -387.2364196777344,
1088
+ "loss": 13.9818,
1089
+ "rewards/accuracies": 0.5,
1090
+ "rewards/chosen": -29.39678382873535,
1091
+ "rewards/margins": -2.448943614959717,
1092
+ "rewards/rejected": -26.94784164428711,
1093
+ "step": 740
1094
+ },
1095
+ {
1096
+ "epoch": 0.14,
1097
+ "learning_rate": 0.0004973971873998035,
1098
+ "logits/chosen": -3.0561656951904297,
1099
+ "logits/rejected": -3.0557007789611816,
1100
+ "logps/chosen": -417.0025329589844,
1101
+ "logps/rejected": -349.56463623046875,
1102
+ "loss": 12.3073,
1103
+ "rewards/accuracies": 0.4625000059604645,
1104
+ "rewards/chosen": -29.997058868408203,
1105
+ "rewards/margins": -5.650521278381348,
1106
+ "rewards/rejected": -24.346534729003906,
1107
+ "step": 750
1108
+ },
1109
+ {
1110
+ "epoch": 0.15,
1111
+ "learning_rate": 0.0004971487246199875,
1112
+ "logits/chosen": -3.0265376567840576,
1113
+ "logits/rejected": -3.0265283584594727,
1114
+ "logps/chosen": -434.55419921875,
1115
+ "logps/rejected": -381.22808837890625,
1116
+ "loss": 12.0398,
1117
+ "rewards/accuracies": 0.4749999940395355,
1118
+ "rewards/chosen": -30.918407440185547,
1119
+ "rewards/margins": -4.258307456970215,
1120
+ "rewards/rejected": -26.66009521484375,
1121
+ "step": 760
1122
+ },
1123
+ {
1124
+ "epoch": 0.15,
1125
+ "learning_rate": 0.000496889004984444,
1126
+ "logits/chosen": -2.8932366371154785,
1127
+ "logits/rejected": -2.895204544067383,
1128
+ "logps/chosen": -396.3167419433594,
1129
+ "logps/rejected": -428.53839111328125,
1130
+ "loss": 9.4104,
1131
+ "rewards/accuracies": 0.5625,
1132
+ "rewards/chosen": -28.4284610748291,
1133
+ "rewards/margins": 1.9928890466690063,
1134
+ "rewards/rejected": -30.42134666442871,
1135
+ "step": 770
1136
+ },
1137
+ {
1138
+ "epoch": 0.15,
1139
+ "learning_rate": 0.0004966180403225946,
1140
+ "logits/chosen": -2.895068407058716,
1141
+ "logits/rejected": -2.894937753677368,
1142
+ "logps/chosen": -395.245849609375,
1143
+ "logps/rejected": -378.4429626464844,
1144
+ "loss": 10.2846,
1145
+ "rewards/accuracies": 0.48750001192092896,
1146
+ "rewards/chosen": -28.237285614013672,
1147
+ "rewards/margins": -1.503316879272461,
1148
+ "rewards/rejected": -26.733972549438477,
1149
+ "step": 780
1150
+ },
1151
+ {
1152
+ "epoch": 0.15,
1153
+ "learning_rate": 0.0004963358429760368,
1154
+ "logits/chosen": -2.551323652267456,
1155
+ "logits/rejected": -2.5523290634155273,
1156
+ "logps/chosen": -477.17327880859375,
1157
+ "logps/rejected": -427.474853515625,
1158
+ "loss": 12.1626,
1159
+ "rewards/accuracies": 0.42500001192092896,
1160
+ "rewards/chosen": -35.30048370361328,
1161
+ "rewards/margins": -4.13530158996582,
1162
+ "rewards/rejected": -31.165185928344727,
1163
+ "step": 790
1164
+ },
1165
+ {
1166
+ "epoch": 0.15,
1167
+ "learning_rate": 0.0004960424257979822,
1168
+ "logits/chosen": -2.7914838790893555,
1169
+ "logits/rejected": -2.790367841720581,
1170
+ "logps/chosen": -478.9364318847656,
1171
+ "logps/rejected": -461.02655029296875,
1172
+ "loss": 10.3404,
1173
+ "rewards/accuracies": 0.5375000238418579,
1174
+ "rewards/chosen": -35.25132369995117,
1175
+ "rewards/margins": -1.6512939929962158,
1176
+ "rewards/rejected": -33.60003662109375,
1177
+ "step": 800
1178
+ },
1179
+ {
1180
+ "epoch": 0.15,
1181
+ "eval_logits/chosen": -3.0016679763793945,
1182
+ "eval_logits/rejected": -3.0014870166778564,
1183
+ "eval_logps/chosen": -441.87054443359375,
1184
+ "eval_logps/rejected": -399.2597961425781,
1185
+ "eval_loss": 11.33322811126709,
1186
+ "eval_rewards/accuracies": 0.4555903971195221,
1187
+ "eval_rewards/chosen": -32.331321716308594,
1188
+ "eval_rewards/margins": -3.850048303604126,
1189
+ "eval_rewards/rejected": -28.481277465820312,
1190
+ "eval_runtime": 1312.1317,
1191
+ "eval_samples_per_second": 0.729,
1192
+ "eval_steps_per_second": 0.729,
1193
+ "step": 800
1194
+ },
1195
+ {
1196
+ "epoch": 0.16,
1197
+ "learning_rate": 0.0004957378021526705,
1198
+ "logits/chosen": -2.9228155612945557,
1199
+ "logits/rejected": -2.925412654876709,
1200
+ "logps/chosen": -481.1300354003906,
1201
+ "logps/rejected": -465.1952209472656,
1202
+ "loss": 11.3707,
1203
+ "rewards/accuracies": 0.4375,
1204
+ "rewards/chosen": -35.0019416809082,
1205
+ "rewards/margins": -1.9142730236053467,
1206
+ "rewards/rejected": -33.08766555786133,
1207
+ "step": 810
1208
+ },
1209
+ {
1210
+ "epoch": 0.16,
1211
+ "learning_rate": 0.0004954219859147614,
1212
+ "logits/chosen": -3.0219039916992188,
1213
+ "logits/rejected": -3.0174221992492676,
1214
+ "logps/chosen": -364.8722839355469,
1215
+ "logps/rejected": -295.5256652832031,
1216
+ "loss": 12.3607,
1217
+ "rewards/accuracies": 0.38749998807907104,
1218
+ "rewards/chosen": -25.810903549194336,
1219
+ "rewards/margins": -5.978564262390137,
1220
+ "rewards/rejected": -19.832340240478516,
1221
+ "step": 820
1222
+ },
1223
+ {
1224
+ "epoch": 0.16,
1225
+ "learning_rate": 0.0004950949914687023,
1226
+ "logits/chosen": -3.118417263031006,
1227
+ "logits/rejected": -3.1218113899230957,
1228
+ "logps/chosen": -469.4906311035156,
1229
+ "logps/rejected": -412.5103454589844,
1230
+ "loss": 11.9496,
1231
+ "rewards/accuracies": 0.48750001192092896,
1232
+ "rewards/chosen": -33.402565002441406,
1233
+ "rewards/margins": -4.156603813171387,
1234
+ "rewards/rejected": -29.245960235595703,
1235
+ "step": 830
1236
+ },
1237
+ {
1238
+ "epoch": 0.16,
1239
+ "learning_rate": 0.0004947568337080732,
1240
+ "logits/chosen": -3.0231635570526123,
1241
+ "logits/rejected": -3.0243794918060303,
1242
+ "logps/chosen": -382.8542785644531,
1243
+ "logps/rejected": -346.9595031738281,
1244
+ "loss": 9.7701,
1245
+ "rewards/accuracies": 0.4375,
1246
+ "rewards/chosen": -27.092443466186523,
1247
+ "rewards/margins": -3.154633045196533,
1248
+ "rewards/rejected": -23.93781089782715,
1249
+ "step": 840
1250
+ },
1251
+ {
1252
+ "epoch": 0.16,
1253
+ "learning_rate": 0.0004944075280349084,
1254
+ "logits/chosen": -3.034963846206665,
1255
+ "logits/rejected": -3.0339653491973877,
1256
+ "logps/chosen": -385.3253173828125,
1257
+ "logps/rejected": -367.23638916015625,
1258
+ "loss": 9.2328,
1259
+ "rewards/accuracies": 0.4625000059604645,
1260
+ "rewards/chosen": -27.111583709716797,
1261
+ "rewards/margins": -1.6716159582138062,
1262
+ "rewards/rejected": -25.43996810913086,
1263
+ "step": 850
1264
+ },
1265
+ {
1266
+ "epoch": 0.17,
1267
+ "learning_rate": 0.0004940470903589948,
1268
+ "logits/chosen": -3.1586310863494873,
1269
+ "logits/rejected": -3.128281831741333,
1270
+ "logps/chosen": -487.345703125,
1271
+ "logps/rejected": -389.9637756347656,
1272
+ "loss": 12.7587,
1273
+ "rewards/accuracies": 0.32499998807907104,
1274
+ "rewards/chosen": -34.73926544189453,
1275
+ "rewards/margins": -6.996462821960449,
1276
+ "rewards/rejected": -27.7428035736084,
1277
+ "step": 860
1278
+ },
1279
+ {
1280
+ "epoch": 0.17,
1281
+ "learning_rate": 0.0004936755370971475,
1282
+ "logits/chosen": -2.9109790325164795,
1283
+ "logits/rejected": -2.888288974761963,
1284
+ "logps/chosen": -463.9207458496094,
1285
+ "logps/rejected": -376.2655334472656,
1286
+ "loss": 13.8618,
1287
+ "rewards/accuracies": 0.36250001192092896,
1288
+ "rewards/chosen": -34.083106994628906,
1289
+ "rewards/margins": -7.244679927825928,
1290
+ "rewards/rejected": -26.838430404663086,
1291
+ "step": 870
1292
+ },
1293
+ {
1294
+ "epoch": 0.17,
1295
+ "learning_rate": 0.0004932928851724621,
1296
+ "logits/chosen": -2.8432798385620117,
1297
+ "logits/rejected": -2.8495278358459473,
1298
+ "logps/chosen": -365.90679931640625,
1299
+ "logps/rejected": -361.8638000488281,
1300
+ "loss": 8.7379,
1301
+ "rewards/accuracies": 0.4749999940395355,
1302
+ "rewards/chosen": -26.055978775024414,
1303
+ "rewards/margins": -0.3134794235229492,
1304
+ "rewards/rejected": -25.742502212524414,
1305
+ "step": 880
1306
+ },
1307
+ {
1308
+ "epoch": 0.17,
1309
+ "learning_rate": 0.0004928991520135436,
1310
+ "logits/chosen": -2.6536898612976074,
1311
+ "logits/rejected": -2.6361289024353027,
1312
+ "logps/chosen": -519.7141723632812,
1313
+ "logps/rejected": -389.46575927734375,
1314
+ "loss": 15.3084,
1315
+ "rewards/accuracies": 0.38749998807907104,
1316
+ "rewards/chosen": -37.92496871948242,
1317
+ "rewards/margins": -10.107343673706055,
1318
+ "rewards/rejected": -27.817623138427734,
1319
+ "step": 890
1320
+ },
1321
+ {
1322
+ "epoch": 0.17,
1323
+ "learning_rate": 0.0004924943555537128,
1324
+ "logits/chosen": -3.1115312576293945,
1325
+ "logits/rejected": -3.0791449546813965,
1326
+ "logps/chosen": -469.66351318359375,
1327
+ "logps/rejected": -401.14508056640625,
1328
+ "loss": 13.3952,
1329
+ "rewards/accuracies": 0.4124999940395355,
1330
+ "rewards/chosen": -33.486183166503906,
1331
+ "rewards/margins": -5.449090480804443,
1332
+ "rewards/rejected": -28.037090301513672,
1333
+ "step": 900
1334
+ },
1335
+ {
1336
+ "epoch": 0.18,
1337
+ "learning_rate": 0.0004920785142301893,
1338
+ "logits/chosen": -2.454453945159912,
1339
+ "logits/rejected": -2.4585988521575928,
1340
+ "logps/chosen": -463.24517822265625,
1341
+ "logps/rejected": -402.23773193359375,
1342
+ "loss": 13.7904,
1343
+ "rewards/accuracies": 0.4000000059604645,
1344
+ "rewards/chosen": -34.2692985534668,
1345
+ "rewards/margins": -5.860762596130371,
1346
+ "rewards/rejected": -28.40853500366211,
1347
+ "step": 910
1348
+ },
1349
+ {
1350
+ "epoch": 0.18,
1351
+ "learning_rate": 0.0004916516469832524,
1352
+ "logits/chosen": -2.8716561794281006,
1353
+ "logits/rejected": -2.8634109497070312,
1354
+ "logps/chosen": -347.35198974609375,
1355
+ "logps/rejected": -340.46942138671875,
1356
+ "loss": 7.8634,
1357
+ "rewards/accuracies": 0.4749999940395355,
1358
+ "rewards/chosen": -24.902542114257812,
1359
+ "rewards/margins": -1.0167404413223267,
1360
+ "rewards/rejected": -23.885799407958984,
1361
+ "step": 920
1362
+ },
1363
+ {
1364
+ "epoch": 0.18,
1365
+ "learning_rate": 0.0004912137732553772,
1366
+ "logits/chosen": -3.2272415161132812,
1367
+ "logits/rejected": -3.2235121726989746,
1368
+ "logps/chosen": -450.973876953125,
1369
+ "logps/rejected": -462.0083923339844,
1370
+ "loss": 9.2953,
1371
+ "rewards/accuracies": 0.512499988079071,
1372
+ "rewards/chosen": -32.52794647216797,
1373
+ "rewards/margins": 0.38686689734458923,
1374
+ "rewards/rejected": -32.91481399536133,
1375
+ "step": 930
1376
+ },
1377
+ {
1378
+ "epoch": 0.18,
1379
+ "learning_rate": 0.0004907649129903504,
1380
+ "logits/chosen": -2.647204875946045,
1381
+ "logits/rejected": -2.6482150554656982,
1382
+ "logps/chosen": -373.737060546875,
1383
+ "logps/rejected": -383.5912780761719,
1384
+ "loss": 7.0717,
1385
+ "rewards/accuracies": 0.4625000059604645,
1386
+ "rewards/chosen": -26.5350284576416,
1387
+ "rewards/margins": 0.7499195337295532,
1388
+ "rewards/rejected": -27.284948348999023,
1389
+ "step": 940
1390
+ },
1391
+ {
1392
+ "epoch": 0.18,
1393
+ "learning_rate": 0.0004903050866323608,
1394
+ "logits/chosen": -3.079465627670288,
1395
+ "logits/rejected": -3.0794615745544434,
1396
+ "logps/chosen": -395.6184997558594,
1397
+ "logps/rejected": -406.3051452636719,
1398
+ "loss": 9.1411,
1399
+ "rewards/accuracies": 0.48750001192092896,
1400
+ "rewards/chosen": -29.57635498046875,
1401
+ "rewards/margins": 0.8788874745368958,
1402
+ "rewards/rejected": -30.45524024963379,
1403
+ "step": 950
1404
+ },
1405
+ {
1406
+ "epoch": 0.19,
1407
+ "learning_rate": 0.000489834315125069,
1408
+ "logits/chosen": -3.1981568336486816,
1409
+ "logits/rejected": -3.1923341751098633,
1410
+ "logps/chosen": -453.5596618652344,
1411
+ "logps/rejected": -425.4774475097656,
1412
+ "loss": 11.2943,
1413
+ "rewards/accuracies": 0.4749999940395355,
1414
+ "rewards/chosen": -32.61115646362305,
1415
+ "rewards/margins": -2.69136118888855,
1416
+ "rewards/rejected": -29.9197998046875,
1417
+ "step": 960
1418
+ },
1419
+ {
1420
+ "epoch": 0.19,
1421
+ "learning_rate": 0.0004893526199106531,
1422
+ "logits/chosen": -2.876206874847412,
1423
+ "logits/rejected": -2.881593942642212,
1424
+ "logps/chosen": -433.71636962890625,
1425
+ "logps/rejected": -391.15692138671875,
1426
+ "loss": 10.9992,
1427
+ "rewards/accuracies": 0.4124999940395355,
1428
+ "rewards/chosen": -31.739410400390625,
1429
+ "rewards/margins": -3.6809983253479004,
1430
+ "rewards/rejected": -28.058406829833984,
1431
+ "step": 970
1432
+ },
1433
+ {
1434
+ "epoch": 0.19,
1435
+ "learning_rate": 0.0004888600229288316,
1436
+ "logits/chosen": -2.865589141845703,
1437
+ "logits/rejected": -2.8664183616638184,
1438
+ "logps/chosen": -359.43023681640625,
1439
+ "logps/rejected": -321.70599365234375,
1440
+ "loss": 7.9915,
1441
+ "rewards/accuracies": 0.4749999940395355,
1442
+ "rewards/chosen": -25.906265258789062,
1443
+ "rewards/margins": -3.289003849029541,
1444
+ "rewards/rejected": -22.617259979248047,
1445
+ "step": 980
1446
+ },
1447
+ {
1448
+ "epoch": 0.19,
1449
+ "learning_rate": 0.0004883565466158652,
1450
+ "logits/chosen": -2.8116517066955566,
1451
+ "logits/rejected": -2.782489776611328,
1452
+ "logps/chosen": -494.1553649902344,
1453
+ "logps/rejected": -430.222412109375,
1454
+ "loss": 13.6529,
1455
+ "rewards/accuracies": 0.4000000059604645,
1456
+ "rewards/chosen": -36.09266662597656,
1457
+ "rewards/margins": -5.284867286682129,
1458
+ "rewards/rejected": -30.807796478271484,
1459
+ "step": 990
1460
+ },
1461
+ {
1462
+ "epoch": 0.19,
1463
+ "learning_rate": 0.0004878422139035341,
1464
+ "logits/chosen": -2.4114489555358887,
1465
+ "logits/rejected": -2.377622604370117,
1466
+ "logps/chosen": -482.96856689453125,
1467
+ "logps/rejected": -437.3125,
1468
+ "loss": 11.137,
1469
+ "rewards/accuracies": 0.5,
1470
+ "rewards/chosen": -34.935813903808594,
1471
+ "rewards/margins": -3.0001707077026367,
1472
+ "rewards/rejected": -31.93564224243164,
1473
+ "step": 1000
1474
+ },
1475
+ {
1476
+ "epoch": 0.19,
1477
+ "eval_logits/chosen": -2.2959094047546387,
1478
+ "eval_logits/rejected": -2.2838947772979736,
1479
+ "eval_logps/chosen": -438.8805236816406,
1480
+ "eval_logps/rejected": -399.5718688964844,
1481
+ "eval_loss": 10.402800559997559,
1482
+ "eval_rewards/accuracies": 0.45036572217941284,
1483
+ "eval_rewards/chosen": -32.032310485839844,
1484
+ "eval_rewards/margins": -3.519833564758301,
1485
+ "eval_rewards/rejected": -28.51247787475586,
1486
+ "eval_runtime": 1313.6418,
1487
+ "eval_samples_per_second": 0.729,
1488
+ "eval_steps_per_second": 0.729,
1489
+ "step": 1000
1490
  }
1491
  ],
1492
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2d24f52f3de130e83c614d71d9b7667bb85da1de04a98bd39d203115dbe7af7
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5958bf8104a088a450e0b4310623d9428611ebdb5fcbb92ab0f567ab6883bfbd
3
  size 4091