vasudevgupta
commited on
Commit
•
4daa96c
1
Parent(s):
9bda347
add ckpt-10500 / epoch-0
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- scheduler.pt +1 -1
- trainer_state.json +153 -3
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1057712183
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7700fcec392ea121226bbc3e9b325a706b1fee3fc0d121b37876c3782fd10133
|
3 |
size 1057712183
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 528910843
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12327197ea2594c4ffecc831bad5c1ede4a1b8cabecb84430e06b24108e2cd14
|
3 |
size 528910843
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:112b6ad0d91eba88b0a5c6b34586dddeefe7213a047a2f6f299e9b87aa672c89
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6156,11 +6156,161 @@
|
|
6156 |
"learning_rate": 6.769676331116133e-05,
|
6157 |
"loss": 1.4452,
|
6158 |
"step": 10250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6159 |
}
|
6160 |
],
|
6161 |
"max_steps": 31521,
|
6162 |
"num_train_epochs": 3,
|
6163 |
-
"total_flos": 9.
|
6164 |
"trial_name": null,
|
6165 |
"trial_params": null
|
6166 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9993337774816788,
|
5 |
+
"global_step": 10500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6156 |
"learning_rate": 6.769676331116133e-05,
|
6157 |
"loss": 1.4452,
|
6158 |
"step": 10250
|
6159 |
+
},
|
6160 |
+
{
|
6161 |
+
"epoch": 0.98,
|
6162 |
+
"learning_rate": 6.766493746220682e-05,
|
6163 |
+
"loss": 1.4993,
|
6164 |
+
"step": 10260
|
6165 |
+
},
|
6166 |
+
{
|
6167 |
+
"epoch": 0.98,
|
6168 |
+
"learning_rate": 6.763311161325229e-05,
|
6169 |
+
"loss": 1.4982,
|
6170 |
+
"step": 10270
|
6171 |
+
},
|
6172 |
+
{
|
6173 |
+
"epoch": 0.98,
|
6174 |
+
"learning_rate": 6.760128576429776e-05,
|
6175 |
+
"loss": 1.651,
|
6176 |
+
"step": 10280
|
6177 |
+
},
|
6178 |
+
{
|
6179 |
+
"epoch": 0.98,
|
6180 |
+
"learning_rate": 6.756945991534325e-05,
|
6181 |
+
"loss": 1.6332,
|
6182 |
+
"step": 10290
|
6183 |
+
},
|
6184 |
+
{
|
6185 |
+
"epoch": 0.98,
|
6186 |
+
"learning_rate": 6.753763406638872e-05,
|
6187 |
+
"loss": 1.4433,
|
6188 |
+
"step": 10300
|
6189 |
+
},
|
6190 |
+
{
|
6191 |
+
"epoch": 0.98,
|
6192 |
+
"learning_rate": 6.75058082174342e-05,
|
6193 |
+
"loss": 1.485,
|
6194 |
+
"step": 10310
|
6195 |
+
},
|
6196 |
+
{
|
6197 |
+
"epoch": 0.98,
|
6198 |
+
"learning_rate": 6.747398236847968e-05,
|
6199 |
+
"loss": 1.6712,
|
6200 |
+
"step": 10320
|
6201 |
+
},
|
6202 |
+
{
|
6203 |
+
"epoch": 0.98,
|
6204 |
+
"learning_rate": 6.744215651952516e-05,
|
6205 |
+
"loss": 1.5196,
|
6206 |
+
"step": 10330
|
6207 |
+
},
|
6208 |
+
{
|
6209 |
+
"epoch": 0.98,
|
6210 |
+
"learning_rate": 6.741033067057064e-05,
|
6211 |
+
"loss": 1.586,
|
6212 |
+
"step": 10340
|
6213 |
+
},
|
6214 |
+
{
|
6215 |
+
"epoch": 0.99,
|
6216 |
+
"learning_rate": 6.737850482161611e-05,
|
6217 |
+
"loss": 1.4815,
|
6218 |
+
"step": 10350
|
6219 |
+
},
|
6220 |
+
{
|
6221 |
+
"epoch": 0.99,
|
6222 |
+
"learning_rate": 6.73466789726616e-05,
|
6223 |
+
"loss": 1.7372,
|
6224 |
+
"step": 10360
|
6225 |
+
},
|
6226 |
+
{
|
6227 |
+
"epoch": 0.99,
|
6228 |
+
"learning_rate": 6.731485312370708e-05,
|
6229 |
+
"loss": 1.2242,
|
6230 |
+
"step": 10370
|
6231 |
+
},
|
6232 |
+
{
|
6233 |
+
"epoch": 0.99,
|
6234 |
+
"learning_rate": 6.728302727475255e-05,
|
6235 |
+
"loss": 1.8329,
|
6236 |
+
"step": 10380
|
6237 |
+
},
|
6238 |
+
{
|
6239 |
+
"epoch": 0.99,
|
6240 |
+
"learning_rate": 6.725120142579803e-05,
|
6241 |
+
"loss": 1.6306,
|
6242 |
+
"step": 10390
|
6243 |
+
},
|
6244 |
+
{
|
6245 |
+
"epoch": 0.99,
|
6246 |
+
"learning_rate": 6.721937557684352e-05,
|
6247 |
+
"loss": 1.6169,
|
6248 |
+
"step": 10400
|
6249 |
+
},
|
6250 |
+
{
|
6251 |
+
"epoch": 0.99,
|
6252 |
+
"learning_rate": 6.7187549727889e-05,
|
6253 |
+
"loss": 1.8023,
|
6254 |
+
"step": 10410
|
6255 |
+
},
|
6256 |
+
{
|
6257 |
+
"epoch": 0.99,
|
6258 |
+
"learning_rate": 6.715572387893447e-05,
|
6259 |
+
"loss": 1.4764,
|
6260 |
+
"step": 10420
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 0.99,
|
6264 |
+
"learning_rate": 6.712389802997994e-05,
|
6265 |
+
"loss": 1.718,
|
6266 |
+
"step": 10430
|
6267 |
+
},
|
6268 |
+
{
|
6269 |
+
"epoch": 0.99,
|
6270 |
+
"learning_rate": 6.709207218102544e-05,
|
6271 |
+
"loss": 1.6362,
|
6272 |
+
"step": 10440
|
6273 |
+
},
|
6274 |
+
{
|
6275 |
+
"epoch": 0.99,
|
6276 |
+
"learning_rate": 6.706024633207091e-05,
|
6277 |
+
"loss": 1.4818,
|
6278 |
+
"step": 10450
|
6279 |
+
},
|
6280 |
+
{
|
6281 |
+
"epoch": 1.0,
|
6282 |
+
"learning_rate": 6.70284204831164e-05,
|
6283 |
+
"loss": 1.8302,
|
6284 |
+
"step": 10460
|
6285 |
+
},
|
6286 |
+
{
|
6287 |
+
"epoch": 1.0,
|
6288 |
+
"learning_rate": 6.699659463416186e-05,
|
6289 |
+
"loss": 1.4897,
|
6290 |
+
"step": 10470
|
6291 |
+
},
|
6292 |
+
{
|
6293 |
+
"epoch": 1.0,
|
6294 |
+
"learning_rate": 6.696476878520735e-05,
|
6295 |
+
"loss": 1.3032,
|
6296 |
+
"step": 10480
|
6297 |
+
},
|
6298 |
+
{
|
6299 |
+
"epoch": 1.0,
|
6300 |
+
"learning_rate": 6.693294293625283e-05,
|
6301 |
+
"loss": 1.4236,
|
6302 |
+
"step": 10490
|
6303 |
+
},
|
6304 |
+
{
|
6305 |
+
"epoch": 1.0,
|
6306 |
+
"learning_rate": 6.69011170872983e-05,
|
6307 |
+
"loss": 1.5428,
|
6308 |
+
"step": 10500
|
6309 |
}
|
6310 |
],
|
6311 |
"max_steps": 31521,
|
6312 |
"num_train_epochs": 3,
|
6313 |
+
"total_flos": 9.955030912708116e+17,
|
6314 |
"trial_name": null,
|
6315 |
"trial_params": null
|
6316 |
}
|