AmberYifan commited on
Commit
1f1e7bf
1 Parent(s): 5b33f92

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step939/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff875785ab924ddd06a66be771594fe532d0d87d67059a754f87a79e011492f6
3
+ size 16060527788
last-checkpoint/global_step939/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57602bd05fa70544df76925acf63ee24ea4316d87843ebc84abd169f793452f1
3
+ size 16060527788
last-checkpoint/global_step939/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19747266673f3adc37eb34cd72e64194822c760661df80b1e4e1f14bd06413f0
3
+ size 16060527788
last-checkpoint/global_step939/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ea89cf520034e1a0d86ed2f8efed6dbcec31fe835a5303f632c173b9ce209b
3
+ size 16060527788
last-checkpoint/global_step939/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bdb7dbee9c796f9aafc9eee12e5897c440ad5a648fd8df8520d6e2380a7c1a5
3
+ size 150693
last-checkpoint/global_step939/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0406da72cdfc68a165458ddeef11d1da49d6d97bb21ad9504edf784cedb97f
3
+ size 150693
last-checkpoint/global_step939/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332ccdbe01b4fa4b6a35acac97bc26143f603959f45d4d12c635ad4e6a2e8632
3
+ size 150693
last-checkpoint/global_step939/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7528f0ef5669d4d46b8576941e18d57af6e08eb71acd5ffb084bdd17ea161d
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step626
 
1
+ global_step939
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d54039dc041f3e015fb26a58181e18b15300073aa7f189906aa141eb163193c
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e8fb1adccd2e7fe2bbeefbafa82a4565057fab3010779fd5e5ab4c409bb496
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be7408aaad853291659cca456977f64b9c12df36e72fcf70bfbd86190f59689d
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc8a42b274b60f2ed063dbc5a0db133fa0fba6e446796edd49203d49a11700aa
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94015a2bd29aadfc52d528f933601b3767e3e5d51650d43bc11706148be83fcb
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4395bbcc01e2ccad78f50643525aed5bf2d95e1b0f624507fdeacc3d0870011
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74954b42e041e5dce92411f4519e39cc59ac7fb94ec1eabe1f6cc6ac121815b9
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60fed5474de507e5cf5f93d70d0b84d7bf7848f88ce747be3d6fa3998258d84
3
  size 1168138808
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2307c03867cef25b5028feb9a23f80e784b9af9a615de13ddca560a6a90fb593
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7cf0d34d60dfda516cf9661904550e2e294e723edd07c25c738f05e8ba92d1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50218cfaecdd818354e567b7167c13899e3b42297e7d8f58bd7e732cfa547800
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b183324e8227a51a9556d86b2ad893a8c4c52205ed4a737356c6611dac7353
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9814a66b49861f5495b06dae3be12ddf7185b88e2cae1fb808ca9efd99d5807f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69e994090f4818cb1fa6f6cefa363178552c3c731c6507ff195bcb07fd5bef
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7be93040a65e0a29975f6c70b94418e1fdf88423a50c58aa572141d3c92fbfc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e61b63402f8afb1f69c960f7944965655dac11e3ccf29919c282f23931f86
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7725b44c69725321786f1e58dccd7ea4d3ae5794ea47bd49c0d4a139aec266d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7d17fbcfa5bee1bf6cb5aedab3ff5a70436912c200d7301d173be443809d63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 626,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,487 @@
984
  "eval_samples_per_second": 16.066,
985
  "eval_steps_per_second": 0.562,
986
  "step": 626
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 10,
@@ -998,7 +1479,7 @@
998
  "should_evaluate": false,
999
  "should_log": false,
1000
  "should_save": true,
1001
- "should_training_stop": false
1002
  },
1003
  "attributes": {}
1004
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 939,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "eval_samples_per_second": 16.066,
985
  "eval_steps_per_second": 0.562,
986
  "step": 626
987
+ },
988
+ {
989
+ "epoch": 2.012779552715655,
990
+ "grad_norm": 4.74652717171804,
991
+ "learning_rate": 1.8284023668639053e-07,
992
+ "logits/chosen": -0.875,
993
+ "logits/rejected": -0.96484375,
994
+ "logps/chosen": -40.75,
995
+ "logps/rejected": -56.0,
996
+ "loss": 0.4227,
997
+ "rewards/accuracies": 0.4625000059604645,
998
+ "rewards/chosen": -1.25,
999
+ "rewards/margins": 1.078125,
1000
+ "rewards/rejected": -2.328125,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 2.0447284345047922,
1005
+ "grad_norm": 9.093805196751525,
1006
+ "learning_rate": 1.7692307692307693e-07,
1007
+ "logits/chosen": -0.95703125,
1008
+ "logits/rejected": -0.96484375,
1009
+ "logps/chosen": -46.25,
1010
+ "logps/rejected": -56.75,
1011
+ "loss": 0.4062,
1012
+ "rewards/accuracies": 0.5249999761581421,
1013
+ "rewards/chosen": -1.234375,
1014
+ "rewards/margins": 1.6640625,
1015
+ "rewards/rejected": -2.90625,
1016
+ "step": 640
1017
+ },
1018
+ {
1019
+ "epoch": 2.07667731629393,
1020
+ "grad_norm": 7.904913836224486,
1021
+ "learning_rate": 1.710059171597633e-07,
1022
+ "logits/chosen": -0.94140625,
1023
+ "logits/rejected": -0.84765625,
1024
+ "logps/chosen": -44.75,
1025
+ "logps/rejected": -53.75,
1026
+ "loss": 0.403,
1027
+ "rewards/accuracies": 0.4625000059604645,
1028
+ "rewards/chosen": -1.265625,
1029
+ "rewards/margins": 1.34375,
1030
+ "rewards/rejected": -2.609375,
1031
+ "step": 650
1032
+ },
1033
+ {
1034
+ "epoch": 2.108626198083067,
1035
+ "grad_norm": 9.216092905835959,
1036
+ "learning_rate": 1.650887573964497e-07,
1037
+ "logits/chosen": -0.85546875,
1038
+ "logits/rejected": -0.81640625,
1039
+ "logps/chosen": -42.5,
1040
+ "logps/rejected": -47.0,
1041
+ "loss": 0.4476,
1042
+ "rewards/accuracies": 0.4124999940395355,
1043
+ "rewards/chosen": -1.15625,
1044
+ "rewards/margins": 1.1796875,
1045
+ "rewards/rejected": -2.34375,
1046
+ "step": 660
1047
+ },
1048
+ {
1049
+ "epoch": 2.1405750798722045,
1050
+ "grad_norm": 8.710740799549797,
1051
+ "learning_rate": 1.591715976331361e-07,
1052
+ "logits/chosen": -1.0234375,
1053
+ "logits/rejected": -0.875,
1054
+ "logps/chosen": -44.5,
1055
+ "logps/rejected": -54.25,
1056
+ "loss": 0.4187,
1057
+ "rewards/accuracies": 0.512499988079071,
1058
+ "rewards/chosen": -0.92578125,
1059
+ "rewards/margins": 1.625,
1060
+ "rewards/rejected": -2.546875,
1061
+ "step": 670
1062
+ },
1063
+ {
1064
+ "epoch": 2.1725239616613417,
1065
+ "grad_norm": 8.094002644119932,
1066
+ "learning_rate": 1.5325443786982248e-07,
1067
+ "logits/chosen": -1.0703125,
1068
+ "logits/rejected": -0.8984375,
1069
+ "logps/chosen": -43.5,
1070
+ "logps/rejected": -59.0,
1071
+ "loss": 0.3973,
1072
+ "rewards/accuracies": 0.5375000238418579,
1073
+ "rewards/chosen": -0.7578125,
1074
+ "rewards/margins": 2.0,
1075
+ "rewards/rejected": -2.765625,
1076
+ "step": 680
1077
+ },
1078
+ {
1079
+ "epoch": 2.2044728434504792,
1080
+ "grad_norm": 8.249215092288534,
1081
+ "learning_rate": 1.4733727810650885e-07,
1082
+ "logits/chosen": -0.96875,
1083
+ "logits/rejected": -0.9765625,
1084
+ "logps/chosen": -41.75,
1085
+ "logps/rejected": -59.75,
1086
+ "loss": 0.3862,
1087
+ "rewards/accuracies": 0.5375000238418579,
1088
+ "rewards/chosen": -1.2109375,
1089
+ "rewards/margins": 1.59375,
1090
+ "rewards/rejected": -2.8125,
1091
+ "step": 690
1092
+ },
1093
+ {
1094
+ "epoch": 2.236421725239617,
1095
+ "grad_norm": 8.713919175156146,
1096
+ "learning_rate": 1.4142011834319526e-07,
1097
+ "logits/chosen": -1.015625,
1098
+ "logits/rejected": -0.81640625,
1099
+ "logps/chosen": -44.25,
1100
+ "logps/rejected": -52.25,
1101
+ "loss": 0.3898,
1102
+ "rewards/accuracies": 0.512499988079071,
1103
+ "rewards/chosen": -1.109375,
1104
+ "rewards/margins": 1.6875,
1105
+ "rewards/rejected": -2.796875,
1106
+ "step": 700
1107
+ },
1108
+ {
1109
+ "epoch": 2.268370607028754,
1110
+ "grad_norm": 2.4789298662571055,
1111
+ "learning_rate": 1.3550295857988164e-07,
1112
+ "logits/chosen": -1.0703125,
1113
+ "logits/rejected": -0.75,
1114
+ "logps/chosen": -49.0,
1115
+ "logps/rejected": -55.0,
1116
+ "loss": 0.387,
1117
+ "rewards/accuracies": 0.4625000059604645,
1118
+ "rewards/chosen": -1.421875,
1119
+ "rewards/margins": 1.6015625,
1120
+ "rewards/rejected": -3.03125,
1121
+ "step": 710
1122
+ },
1123
+ {
1124
+ "epoch": 2.3003194888178915,
1125
+ "grad_norm": 13.408171707422571,
1126
+ "learning_rate": 1.2958579881656802e-07,
1127
+ "logits/chosen": -0.81640625,
1128
+ "logits/rejected": -0.859375,
1129
+ "logps/chosen": -42.0,
1130
+ "logps/rejected": -54.5,
1131
+ "loss": 0.4224,
1132
+ "rewards/accuracies": 0.4625000059604645,
1133
+ "rewards/chosen": -1.546875,
1134
+ "rewards/margins": 1.3984375,
1135
+ "rewards/rejected": -2.953125,
1136
+ "step": 720
1137
+ },
1138
+ {
1139
+ "epoch": 2.3322683706070286,
1140
+ "grad_norm": 5.024401475671983,
1141
+ "learning_rate": 1.2366863905325443e-07,
1142
+ "logits/chosen": -0.95703125,
1143
+ "logits/rejected": -0.89453125,
1144
+ "logps/chosen": -47.0,
1145
+ "logps/rejected": -57.0,
1146
+ "loss": 0.393,
1147
+ "rewards/accuracies": 0.5,
1148
+ "rewards/chosen": -1.2421875,
1149
+ "rewards/margins": 1.671875,
1150
+ "rewards/rejected": -2.921875,
1151
+ "step": 730
1152
+ },
1153
+ {
1154
+ "epoch": 2.364217252396166,
1155
+ "grad_norm": 27.208339044008937,
1156
+ "learning_rate": 1.1775147928994082e-07,
1157
+ "logits/chosen": -0.97265625,
1158
+ "logits/rejected": -0.921875,
1159
+ "logps/chosen": -47.0,
1160
+ "logps/rejected": -63.0,
1161
+ "loss": 0.4028,
1162
+ "rewards/accuracies": 0.5,
1163
+ "rewards/chosen": -1.4375,
1164
+ "rewards/margins": 1.640625,
1165
+ "rewards/rejected": -3.078125,
1166
+ "step": 740
1167
+ },
1168
+ {
1169
+ "epoch": 2.3961661341853033,
1170
+ "grad_norm": 10.51991490071309,
1171
+ "learning_rate": 1.1183431952662721e-07,
1172
+ "logits/chosen": -0.98046875,
1173
+ "logits/rejected": -1.03125,
1174
+ "logps/chosen": -43.5,
1175
+ "logps/rejected": -69.0,
1176
+ "loss": 0.4102,
1177
+ "rewards/accuracies": 0.612500011920929,
1178
+ "rewards/chosen": -1.5546875,
1179
+ "rewards/margins": 2.125,
1180
+ "rewards/rejected": -3.671875,
1181
+ "step": 750
1182
+ },
1183
+ {
1184
+ "epoch": 2.428115015974441,
1185
+ "grad_norm": 11.046462734712232,
1186
+ "learning_rate": 1.059171597633136e-07,
1187
+ "logits/chosen": -0.9375,
1188
+ "logits/rejected": -0.77734375,
1189
+ "logps/chosen": -46.5,
1190
+ "logps/rejected": -53.5,
1191
+ "loss": 0.374,
1192
+ "rewards/accuracies": 0.512499988079071,
1193
+ "rewards/chosen": -1.265625,
1194
+ "rewards/margins": 1.78125,
1195
+ "rewards/rejected": -3.03125,
1196
+ "step": 760
1197
+ },
1198
+ {
1199
+ "epoch": 2.460063897763578,
1200
+ "grad_norm": 7.941784467499254,
1201
+ "learning_rate": 1e-07,
1202
+ "logits/chosen": -0.79296875,
1203
+ "logits/rejected": -0.921875,
1204
+ "logps/chosen": -40.5,
1205
+ "logps/rejected": -67.5,
1206
+ "loss": 0.3808,
1207
+ "rewards/accuracies": 0.5375000238418579,
1208
+ "rewards/chosen": -1.6328125,
1209
+ "rewards/margins": 1.96875,
1210
+ "rewards/rejected": -3.609375,
1211
+ "step": 770
1212
+ },
1213
+ {
1214
+ "epoch": 2.4920127795527156,
1215
+ "grad_norm": 18.383843736884476,
1216
+ "learning_rate": 9.408284023668639e-08,
1217
+ "logits/chosen": -0.953125,
1218
+ "logits/rejected": -0.953125,
1219
+ "logps/chosen": -44.5,
1220
+ "logps/rejected": -63.0,
1221
+ "loss": 0.3853,
1222
+ "rewards/accuracies": 0.44999998807907104,
1223
+ "rewards/chosen": -1.6015625,
1224
+ "rewards/margins": 1.8046875,
1225
+ "rewards/rejected": -3.390625,
1226
+ "step": 780
1227
+ },
1228
+ {
1229
+ "epoch": 2.523961661341853,
1230
+ "grad_norm": 12.735279104095845,
1231
+ "learning_rate": 8.816568047337278e-08,
1232
+ "logits/chosen": -0.95703125,
1233
+ "logits/rejected": -0.859375,
1234
+ "logps/chosen": -46.5,
1235
+ "logps/rejected": -53.75,
1236
+ "loss": 0.4527,
1237
+ "rewards/accuracies": 0.375,
1238
+ "rewards/chosen": -1.7421875,
1239
+ "rewards/margins": 1.2421875,
1240
+ "rewards/rejected": -2.984375,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 2.5559105431309903,
1245
+ "grad_norm": 25.31092617144284,
1246
+ "learning_rate": 8.224852071005916e-08,
1247
+ "logits/chosen": -0.75,
1248
+ "logits/rejected": -1.03125,
1249
+ "logps/chosen": -44.25,
1250
+ "logps/rejected": -72.0,
1251
+ "loss": 0.3664,
1252
+ "rewards/accuracies": 0.5625,
1253
+ "rewards/chosen": -1.8671875,
1254
+ "rewards/margins": 1.890625,
1255
+ "rewards/rejected": -3.765625,
1256
+ "step": 800
1257
+ },
1258
+ {
1259
+ "epoch": 2.587859424920128,
1260
+ "grad_norm": 28.35128868207696,
1261
+ "learning_rate": 7.633136094674555e-08,
1262
+ "logits/chosen": -0.953125,
1263
+ "logits/rejected": -0.83203125,
1264
+ "logps/chosen": -50.5,
1265
+ "logps/rejected": -55.0,
1266
+ "loss": 0.4429,
1267
+ "rewards/accuracies": 0.42500001192092896,
1268
+ "rewards/chosen": -1.6796875,
1269
+ "rewards/margins": 1.5703125,
1270
+ "rewards/rejected": -3.25,
1271
+ "step": 810
1272
+ },
1273
+ {
1274
+ "epoch": 2.619808306709265,
1275
+ "grad_norm": 8.972843974408116,
1276
+ "learning_rate": 7.041420118343195e-08,
1277
+ "logits/chosen": -1.03125,
1278
+ "logits/rejected": -0.94140625,
1279
+ "logps/chosen": -51.0,
1280
+ "logps/rejected": -70.0,
1281
+ "loss": 0.3984,
1282
+ "rewards/accuracies": 0.5874999761581421,
1283
+ "rewards/chosen": -1.46875,
1284
+ "rewards/margins": 2.421875,
1285
+ "rewards/rejected": -3.890625,
1286
+ "step": 820
1287
+ },
1288
+ {
1289
+ "epoch": 2.6517571884984026,
1290
+ "grad_norm": 8.613351315510789,
1291
+ "learning_rate": 6.449704142011835e-08,
1292
+ "logits/chosen": -0.96875,
1293
+ "logits/rejected": -1.0546875,
1294
+ "logps/chosen": -50.25,
1295
+ "logps/rejected": -69.0,
1296
+ "loss": 0.4115,
1297
+ "rewards/accuracies": 0.512499988079071,
1298
+ "rewards/chosen": -1.46875,
1299
+ "rewards/margins": 2.03125,
1300
+ "rewards/rejected": -3.5,
1301
+ "step": 830
1302
+ },
1303
+ {
1304
+ "epoch": 2.68370607028754,
1305
+ "grad_norm": 9.70223823689821,
1306
+ "learning_rate": 5.857988165680473e-08,
1307
+ "logits/chosen": -1.03125,
1308
+ "logits/rejected": -0.75,
1309
+ "logps/chosen": -49.0,
1310
+ "logps/rejected": -53.5,
1311
+ "loss": 0.3931,
1312
+ "rewards/accuracies": 0.574999988079071,
1313
+ "rewards/chosen": -1.2890625,
1314
+ "rewards/margins": 1.890625,
1315
+ "rewards/rejected": -3.171875,
1316
+ "step": 840
1317
+ },
1318
+ {
1319
+ "epoch": 2.7156549520766773,
1320
+ "grad_norm": 14.809661385875646,
1321
+ "learning_rate": 5.266272189349112e-08,
1322
+ "logits/chosen": -0.96875,
1323
+ "logits/rejected": -0.953125,
1324
+ "logps/chosen": -47.25,
1325
+ "logps/rejected": -62.75,
1326
+ "loss": 0.4115,
1327
+ "rewards/accuracies": 0.4749999940395355,
1328
+ "rewards/chosen": -1.6484375,
1329
+ "rewards/margins": 1.7734375,
1330
+ "rewards/rejected": -3.421875,
1331
+ "step": 850
1332
+ },
1333
+ {
1334
+ "epoch": 2.747603833865815,
1335
+ "grad_norm": 5.380820798764086,
1336
+ "learning_rate": 4.674556213017751e-08,
1337
+ "logits/chosen": -0.98046875,
1338
+ "logits/rejected": -0.8828125,
1339
+ "logps/chosen": -47.0,
1340
+ "logps/rejected": -61.25,
1341
+ "loss": 0.3984,
1342
+ "rewards/accuracies": 0.4625000059604645,
1343
+ "rewards/chosen": -1.625,
1344
+ "rewards/margins": 1.703125,
1345
+ "rewards/rejected": -3.328125,
1346
+ "step": 860
1347
+ },
1348
+ {
1349
+ "epoch": 2.779552715654952,
1350
+ "grad_norm": 19.658075353925057,
1351
+ "learning_rate": 4.082840236686391e-08,
1352
+ "logits/chosen": -0.90625,
1353
+ "logits/rejected": -0.7578125,
1354
+ "logps/chosen": -49.25,
1355
+ "logps/rejected": -57.5,
1356
+ "loss": 0.3908,
1357
+ "rewards/accuracies": 0.4749999940395355,
1358
+ "rewards/chosen": -1.8984375,
1359
+ "rewards/margins": 1.578125,
1360
+ "rewards/rejected": -3.46875,
1361
+ "step": 870
1362
+ },
1363
+ {
1364
+ "epoch": 2.8115015974440896,
1365
+ "grad_norm": 10.442407160730754,
1366
+ "learning_rate": 3.4911242603550294e-08,
1367
+ "logits/chosen": -0.84375,
1368
+ "logits/rejected": -0.91796875,
1369
+ "logps/chosen": -41.75,
1370
+ "logps/rejected": -63.75,
1371
+ "loss": 0.4039,
1372
+ "rewards/accuracies": 0.48750001192092896,
1373
+ "rewards/chosen": -1.8125,
1374
+ "rewards/margins": 1.53125,
1375
+ "rewards/rejected": -3.34375,
1376
+ "step": 880
1377
+ },
1378
+ {
1379
+ "epoch": 2.8434504792332267,
1380
+ "grad_norm": 29.6132532828452,
1381
+ "learning_rate": 2.8994082840236687e-08,
1382
+ "logits/chosen": -0.95703125,
1383
+ "logits/rejected": -0.82421875,
1384
+ "logps/chosen": -46.75,
1385
+ "logps/rejected": -62.5,
1386
+ "loss": 0.3814,
1387
+ "rewards/accuracies": 0.574999988079071,
1388
+ "rewards/chosen": -1.5859375,
1389
+ "rewards/margins": 2.0625,
1390
+ "rewards/rejected": -3.640625,
1391
+ "step": 890
1392
+ },
1393
+ {
1394
+ "epoch": 2.8753993610223643,
1395
+ "grad_norm": 5.3338488869709035,
1396
+ "learning_rate": 2.3076923076923076e-08,
1397
+ "logits/chosen": -0.7578125,
1398
+ "logits/rejected": -0.81640625,
1399
+ "logps/chosen": -37.5,
1400
+ "logps/rejected": -55.0,
1401
+ "loss": 0.4095,
1402
+ "rewards/accuracies": 0.4124999940395355,
1403
+ "rewards/chosen": -1.671875,
1404
+ "rewards/margins": 1.34375,
1405
+ "rewards/rejected": -3.015625,
1406
+ "step": 900
1407
+ },
1408
+ {
1409
+ "epoch": 2.9073482428115014,
1410
+ "grad_norm": 8.959126987075347,
1411
+ "learning_rate": 1.7159763313609465e-08,
1412
+ "logits/chosen": -1.0078125,
1413
+ "logits/rejected": -0.84375,
1414
+ "logps/chosen": -45.75,
1415
+ "logps/rejected": -55.5,
1416
+ "loss": 0.3631,
1417
+ "rewards/accuracies": 0.5249999761581421,
1418
+ "rewards/chosen": -1.3984375,
1419
+ "rewards/margins": 1.6015625,
1420
+ "rewards/rejected": -3.0,
1421
+ "step": 910
1422
+ },
1423
+ {
1424
+ "epoch": 2.939297124600639,
1425
+ "grad_norm": 12.28972695659583,
1426
+ "learning_rate": 1.1242603550295858e-08,
1427
+ "logits/chosen": -1.015625,
1428
+ "logits/rejected": -0.9765625,
1429
+ "logps/chosen": -49.75,
1430
+ "logps/rejected": -69.0,
1431
+ "loss": 0.4118,
1432
+ "rewards/accuracies": 0.5625,
1433
+ "rewards/chosen": -1.703125,
1434
+ "rewards/margins": 2.09375,
1435
+ "rewards/rejected": -3.796875,
1436
+ "step": 920
1437
+ },
1438
+ {
1439
+ "epoch": 2.9712460063897765,
1440
+ "grad_norm": 7.913983074444077,
1441
+ "learning_rate": 5.325443786982248e-09,
1442
+ "logits/chosen": -0.984375,
1443
+ "logits/rejected": -0.7734375,
1444
+ "logps/chosen": -46.5,
1445
+ "logps/rejected": -49.0,
1446
+ "loss": 0.4176,
1447
+ "rewards/accuracies": 0.42500001192092896,
1448
+ "rewards/chosen": -1.6484375,
1449
+ "rewards/margins": 1.28125,
1450
+ "rewards/rejected": -2.921875,
1451
+ "step": 930
1452
+ },
1453
+ {
1454
+ "epoch": 3.0,
1455
+ "eval_logits/chosen": -1.078125,
1456
+ "eval_logits/rejected": -0.91796875,
1457
+ "eval_logps/chosen": -57.0,
1458
+ "eval_logps/rejected": -56.75,
1459
+ "eval_loss": 0.5467187762260437,
1460
+ "eval_rewards/accuracies": 0.4642857015132904,
1461
+ "eval_rewards/chosen": -1.828125,
1462
+ "eval_rewards/margins": 1.234375,
1463
+ "eval_rewards/rejected": -3.0625,
1464
+ "eval_runtime": 14.7769,
1465
+ "eval_samples_per_second": 13.535,
1466
+ "eval_steps_per_second": 0.474,
1467
+ "step": 939
1468
  }
1469
  ],
1470
  "logging_steps": 10,
 
1479
  "should_evaluate": false,
1480
  "should_log": false,
1481
  "should_save": true,
1482
+ "should_training_stop": true
1483
  },
1484
  "attributes": {}
1485
  }