amazingvince commited on
Commit
e0ad69a
1 Parent(s): 6272aa6

Upload folder using huggingface_hub

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step800
 
1
+ global_step1000
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a86fef55663ceab77cfbc176dbdd7e4f081736340f277ec77498a59dc161cb9
3
  size 4944210912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c5dd58db09ef6c8d26f575bb1c52fc43bb2da069752a940c3c786d5296ab76
3
  size 4944210912
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4637578c863f7b6c9e7b90fae2c226a5d8ee9f85970a6c7b0d065458dc213a58
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc780af17679805c0358ac7f2f9355f4108dcfad591c2b4c924313d821ed4f2
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cab56107817da51d8770ed50dc05a9d5ca28cfac0d5c0c8437e3afd0b6205b82
3
  size 4541564920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780efe7446c48ba32e596540697fa9c2036e9ced694d2d25e4cba8622a7085b1
3
  size 4541564920
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.032494732299256174,
5
  "eval_steps": 400,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -989,6 +989,246 @@
989
  "eval_samples_per_second": 17.143,
990
  "eval_steps_per_second": 2.862,
991
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
992
  }
993
  ],
994
  "logging_steps": 5,
@@ -996,7 +1236,7 @@
996
  "num_input_tokens_seen": 0,
997
  "num_train_epochs": 1,
998
  "save_steps": 200,
999
- "total_flos": 110960495632384.0,
1000
  "trial_name": null,
1001
  "trial_params": null
1002
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04061841537407022,
5
  "eval_steps": 400,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
989
  "eval_samples_per_second": 17.143,
990
  "eval_steps_per_second": 2.862,
991
  "step": 800
992
+ },
993
+ {
994
+ "epoch": 0.03,
995
+ "learning_rate": 1.997414356980759e-06,
996
+ "loss": 0.7896,
997
+ "step": 805
998
+ },
999
+ {
1000
+ "epoch": 0.03,
1001
+ "learning_rate": 1.9973678321159443e-06,
1002
+ "loss": 0.7029,
1003
+ "step": 810
1004
+ },
1005
+ {
1006
+ "epoch": 0.03,
1007
+ "learning_rate": 1.9973208929532063e-06,
1008
+ "loss": 0.7063,
1009
+ "step": 815
1010
+ },
1011
+ {
1012
+ "epoch": 0.03,
1013
+ "learning_rate": 1.9972735395120418e-06,
1014
+ "loss": 0.7171,
1015
+ "step": 820
1016
+ },
1017
+ {
1018
+ "epoch": 0.03,
1019
+ "learning_rate": 1.997225771812122e-06,
1020
+ "loss": 0.7217,
1021
+ "step": 825
1022
+ },
1023
+ {
1024
+ "epoch": 0.03,
1025
+ "learning_rate": 1.9971775898732893e-06,
1026
+ "loss": 0.725,
1027
+ "step": 830
1028
+ },
1029
+ {
1030
+ "epoch": 0.03,
1031
+ "learning_rate": 1.9971289937155577e-06,
1032
+ "loss": 0.7252,
1033
+ "step": 835
1034
+ },
1035
+ {
1036
+ "epoch": 0.03,
1037
+ "learning_rate": 1.997079983359113e-06,
1038
+ "loss": 0.7049,
1039
+ "step": 840
1040
+ },
1041
+ {
1042
+ "epoch": 0.03,
1043
+ "learning_rate": 1.9970305588243145e-06,
1044
+ "loss": 0.7056,
1045
+ "step": 845
1046
+ },
1047
+ {
1048
+ "epoch": 0.03,
1049
+ "learning_rate": 1.9969807201316925e-06,
1050
+ "loss": 0.6981,
1051
+ "step": 850
1052
+ },
1053
+ {
1054
+ "epoch": 0.03,
1055
+ "learning_rate": 1.9969304673019494e-06,
1056
+ "loss": 0.7154,
1057
+ "step": 855
1058
+ },
1059
+ {
1060
+ "epoch": 0.03,
1061
+ "learning_rate": 1.99687980035596e-06,
1062
+ "loss": 0.7277,
1063
+ "step": 860
1064
+ },
1065
+ {
1066
+ "epoch": 0.04,
1067
+ "learning_rate": 1.996828719314771e-06,
1068
+ "loss": 0.7181,
1069
+ "step": 865
1070
+ },
1071
+ {
1072
+ "epoch": 0.04,
1073
+ "learning_rate": 1.996777224199601e-06,
1074
+ "loss": 0.7028,
1075
+ "step": 870
1076
+ },
1077
+ {
1078
+ "epoch": 0.04,
1079
+ "learning_rate": 1.99672531503184e-06,
1080
+ "loss": 0.7336,
1081
+ "step": 875
1082
+ },
1083
+ {
1084
+ "epoch": 0.04,
1085
+ "learning_rate": 1.996672991833051e-06,
1086
+ "loss": 0.714,
1087
+ "step": 880
1088
+ },
1089
+ {
1090
+ "epoch": 0.04,
1091
+ "learning_rate": 1.996620254624969e-06,
1092
+ "loss": 0.713,
1093
+ "step": 885
1094
+ },
1095
+ {
1096
+ "epoch": 0.04,
1097
+ "learning_rate": 1.9965671034295e-06,
1098
+ "loss": 0.7295,
1099
+ "step": 890
1100
+ },
1101
+ {
1102
+ "epoch": 0.04,
1103
+ "learning_rate": 1.996513538268723e-06,
1104
+ "loss": 0.7798,
1105
+ "step": 895
1106
+ },
1107
+ {
1108
+ "epoch": 0.04,
1109
+ "learning_rate": 1.9964595591648883e-06,
1110
+ "loss": 0.7388,
1111
+ "step": 900
1112
+ },
1113
+ {
1114
+ "epoch": 0.04,
1115
+ "learning_rate": 1.9964051661404185e-06,
1116
+ "loss": 0.6813,
1117
+ "step": 905
1118
+ },
1119
+ {
1120
+ "epoch": 0.04,
1121
+ "learning_rate": 1.9963503592179078e-06,
1122
+ "loss": 0.7163,
1123
+ "step": 910
1124
+ },
1125
+ {
1126
+ "epoch": 0.04,
1127
+ "learning_rate": 1.996295138420122e-06,
1128
+ "loss": 0.7595,
1129
+ "step": 915
1130
+ },
1131
+ {
1132
+ "epoch": 0.04,
1133
+ "learning_rate": 1.9962395037700007e-06,
1134
+ "loss": 0.7457,
1135
+ "step": 920
1136
+ },
1137
+ {
1138
+ "epoch": 0.04,
1139
+ "learning_rate": 1.996183455290653e-06,
1140
+ "loss": 0.6897,
1141
+ "step": 925
1142
+ },
1143
+ {
1144
+ "epoch": 0.04,
1145
+ "learning_rate": 1.996126993005361e-06,
1146
+ "loss": 0.7031,
1147
+ "step": 930
1148
+ },
1149
+ {
1150
+ "epoch": 0.04,
1151
+ "learning_rate": 1.996070116937579e-06,
1152
+ "loss": 0.7177,
1153
+ "step": 935
1154
+ },
1155
+ {
1156
+ "epoch": 0.04,
1157
+ "learning_rate": 1.9960128271109326e-06,
1158
+ "loss": 0.6966,
1159
+ "step": 940
1160
+ },
1161
+ {
1162
+ "epoch": 0.04,
1163
+ "learning_rate": 1.9959551235492195e-06,
1164
+ "loss": 0.7391,
1165
+ "step": 945
1166
+ },
1167
+ {
1168
+ "epoch": 0.04,
1169
+ "learning_rate": 1.9958970062764095e-06,
1170
+ "loss": 0.7456,
1171
+ "step": 950
1172
+ },
1173
+ {
1174
+ "epoch": 0.04,
1175
+ "learning_rate": 1.9958384753166437e-06,
1176
+ "loss": 0.7072,
1177
+ "step": 955
1178
+ },
1179
+ {
1180
+ "epoch": 0.04,
1181
+ "learning_rate": 1.995779530694236e-06,
1182
+ "loss": 0.6894,
1183
+ "step": 960
1184
+ },
1185
+ {
1186
+ "epoch": 0.04,
1187
+ "learning_rate": 1.9957201724336704e-06,
1188
+ "loss": 0.704,
1189
+ "step": 965
1190
+ },
1191
+ {
1192
+ "epoch": 0.04,
1193
+ "learning_rate": 1.9956604005596043e-06,
1194
+ "loss": 0.6939,
1195
+ "step": 970
1196
+ },
1197
+ {
1198
+ "epoch": 0.04,
1199
+ "learning_rate": 1.9956002150968667e-06,
1200
+ "loss": 0.7045,
1201
+ "step": 975
1202
+ },
1203
+ {
1204
+ "epoch": 0.04,
1205
+ "learning_rate": 1.9955396160704582e-06,
1206
+ "loss": 0.6791,
1207
+ "step": 980
1208
+ },
1209
+ {
1210
+ "epoch": 0.04,
1211
+ "learning_rate": 1.99547860350555e-06,
1212
+ "loss": 0.675,
1213
+ "step": 985
1214
+ },
1215
+ {
1216
+ "epoch": 0.04,
1217
+ "learning_rate": 1.995417177427488e-06,
1218
+ "loss": 0.7163,
1219
+ "step": 990
1220
+ },
1221
+ {
1222
+ "epoch": 0.04,
1223
+ "learning_rate": 1.9953553378617866e-06,
1224
+ "loss": 0.6922,
1225
+ "step": 995
1226
+ },
1227
+ {
1228
+ "epoch": 0.04,
1229
+ "learning_rate": 1.995293084834134e-06,
1230
+ "loss": 0.7101,
1231
+ "step": 1000
1232
  }
1233
  ],
1234
  "logging_steps": 5,
 
1236
  "num_input_tokens_seen": 0,
1237
  "num_train_epochs": 1,
1238
  "save_steps": 200,
1239
+ "total_flos": 138834746810368.0,
1240
  "trial_name": null,
1241
  "trial_params": null
1242
  }