vasudevgupta commited on
Commit
4daa96c
1 Parent(s): 9bda347

add ckpt-10500 / epoch-0

Browse files
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +153 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7815c1c0189300400f56cd52928f5ff13a0a149601801a11869e3da9aec1c8c7
3
  size 1057712183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7700fcec392ea121226bbc3e9b325a706b1fee3fc0d121b37876c3782fd10133
3
  size 1057712183
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:366526d56f212d3b35c55cb08198a66a22edb85a2b36266515e46ea56eb57fec
3
  size 528910843
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12327197ea2594c4ffecc831bad5c1ede4a1b8cabecb84430e06b24108e2cd14
3
  size 528910843
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05340714cd922e69aa91d58d9e8df1033830fac0f368cbdfb60b74a91044a3b1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112b6ad0d91eba88b0a5c6b34586dddeefe7213a047a2f6f299e9b87aa672c89
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9755401161130675,
5
- "global_step": 10250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6156,11 +6156,161 @@
6156
  "learning_rate": 6.769676331116133e-05,
6157
  "loss": 1.4452,
6158
  "step": 10250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6159
  }
6160
  ],
6161
  "max_steps": 31521,
6162
  "num_train_epochs": 3,
6163
- "total_flos": 9.71805765981438e+17,
6164
  "trial_name": null,
6165
  "trial_params": null
6166
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9993337774816788,
5
+ "global_step": 10500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6156
  "learning_rate": 6.769676331116133e-05,
6157
  "loss": 1.4452,
6158
  "step": 10250
6159
+ },
6160
+ {
6161
+ "epoch": 0.98,
6162
+ "learning_rate": 6.766493746220682e-05,
6163
+ "loss": 1.4993,
6164
+ "step": 10260
6165
+ },
6166
+ {
6167
+ "epoch": 0.98,
6168
+ "learning_rate": 6.763311161325229e-05,
6169
+ "loss": 1.4982,
6170
+ "step": 10270
6171
+ },
6172
+ {
6173
+ "epoch": 0.98,
6174
+ "learning_rate": 6.760128576429776e-05,
6175
+ "loss": 1.651,
6176
+ "step": 10280
6177
+ },
6178
+ {
6179
+ "epoch": 0.98,
6180
+ "learning_rate": 6.756945991534325e-05,
6181
+ "loss": 1.6332,
6182
+ "step": 10290
6183
+ },
6184
+ {
6185
+ "epoch": 0.98,
6186
+ "learning_rate": 6.753763406638872e-05,
6187
+ "loss": 1.4433,
6188
+ "step": 10300
6189
+ },
6190
+ {
6191
+ "epoch": 0.98,
6192
+ "learning_rate": 6.75058082174342e-05,
6193
+ "loss": 1.485,
6194
+ "step": 10310
6195
+ },
6196
+ {
6197
+ "epoch": 0.98,
6198
+ "learning_rate": 6.747398236847968e-05,
6199
+ "loss": 1.6712,
6200
+ "step": 10320
6201
+ },
6202
+ {
6203
+ "epoch": 0.98,
6204
+ "learning_rate": 6.744215651952516e-05,
6205
+ "loss": 1.5196,
6206
+ "step": 10330
6207
+ },
6208
+ {
6209
+ "epoch": 0.98,
6210
+ "learning_rate": 6.741033067057064e-05,
6211
+ "loss": 1.586,
6212
+ "step": 10340
6213
+ },
6214
+ {
6215
+ "epoch": 0.99,
6216
+ "learning_rate": 6.737850482161611e-05,
6217
+ "loss": 1.4815,
6218
+ "step": 10350
6219
+ },
6220
+ {
6221
+ "epoch": 0.99,
6222
+ "learning_rate": 6.73466789726616e-05,
6223
+ "loss": 1.7372,
6224
+ "step": 10360
6225
+ },
6226
+ {
6227
+ "epoch": 0.99,
6228
+ "learning_rate": 6.731485312370708e-05,
6229
+ "loss": 1.2242,
6230
+ "step": 10370
6231
+ },
6232
+ {
6233
+ "epoch": 0.99,
6234
+ "learning_rate": 6.728302727475255e-05,
6235
+ "loss": 1.8329,
6236
+ "step": 10380
6237
+ },
6238
+ {
6239
+ "epoch": 0.99,
6240
+ "learning_rate": 6.725120142579803e-05,
6241
+ "loss": 1.6306,
6242
+ "step": 10390
6243
+ },
6244
+ {
6245
+ "epoch": 0.99,
6246
+ "learning_rate": 6.721937557684352e-05,
6247
+ "loss": 1.6169,
6248
+ "step": 10400
6249
+ },
6250
+ {
6251
+ "epoch": 0.99,
6252
+ "learning_rate": 6.7187549727889e-05,
6253
+ "loss": 1.8023,
6254
+ "step": 10410
6255
+ },
6256
+ {
6257
+ "epoch": 0.99,
6258
+ "learning_rate": 6.715572387893447e-05,
6259
+ "loss": 1.4764,
6260
+ "step": 10420
6261
+ },
6262
+ {
6263
+ "epoch": 0.99,
6264
+ "learning_rate": 6.712389802997994e-05,
6265
+ "loss": 1.718,
6266
+ "step": 10430
6267
+ },
6268
+ {
6269
+ "epoch": 0.99,
6270
+ "learning_rate": 6.709207218102544e-05,
6271
+ "loss": 1.6362,
6272
+ "step": 10440
6273
+ },
6274
+ {
6275
+ "epoch": 0.99,
6276
+ "learning_rate": 6.706024633207091e-05,
6277
+ "loss": 1.4818,
6278
+ "step": 10450
6279
+ },
6280
+ {
6281
+ "epoch": 1.0,
6282
+ "learning_rate": 6.70284204831164e-05,
6283
+ "loss": 1.8302,
6284
+ "step": 10460
6285
+ },
6286
+ {
6287
+ "epoch": 1.0,
6288
+ "learning_rate": 6.699659463416186e-05,
6289
+ "loss": 1.4897,
6290
+ "step": 10470
6291
+ },
6292
+ {
6293
+ "epoch": 1.0,
6294
+ "learning_rate": 6.696476878520735e-05,
6295
+ "loss": 1.3032,
6296
+ "step": 10480
6297
+ },
6298
+ {
6299
+ "epoch": 1.0,
6300
+ "learning_rate": 6.693294293625283e-05,
6301
+ "loss": 1.4236,
6302
+ "step": 10490
6303
+ },
6304
+ {
6305
+ "epoch": 1.0,
6306
+ "learning_rate": 6.69011170872983e-05,
6307
+ "loss": 1.5428,
6308
+ "step": 10500
6309
  }
6310
  ],
6311
  "max_steps": 31521,
6312
  "num_train_epochs": 3,
6313
+ "total_flos": 9.955030912708116e+17,
6314
  "trial_name": null,
6315
  "trial_params": null
6316
  }