plip commited on
Commit
3497578
1 Parent(s): f1e0679

Training in progress, step 420000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1baf6720e03a80bba42f160ede8ecbec1168817e98171665a299ded4d992a3a
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8b7da07a19b89f2cab9e7ae5ecfface2ed5a4207f59160b87fe1ed401ae453
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db26ab4777ecfb64dd27c707c9cfebcf74d431f5c02f5554c247500eee4a57f0
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60abe9bac7f0fc7b8ccd2d34e905c6889ebea4f84a53f5f1e4a7b1ee92473794
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498c95fdcab578849c69bf4a10f0532f5bf3ba8048fa37c1b416b3afd0ea5f33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.552878678440784,
5
- "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8206,11 +8206,211 @@
8206
  "eval_samples_per_second": 1491.858,
8207
  "eval_steps_per_second": 23.756,
8208
  "step": 410000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8209
  }
8210
  ],
8211
  "max_steps": 500000,
8212
  "num_train_epochs": 12,
8213
- "total_flos": 1.309895021683634e+22,
8214
  "trial_name": null,
8215
  "trial_params": null
8216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.785875719378364,
5
+ "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8206
  "eval_samples_per_second": 1491.858,
8207
  "eval_steps_per_second": 23.756,
8208
  "step": 410000
8209
+ },
8210
+ {
8211
+ "epoch": 9.56,
8212
+ "learning_rate": 3.4670510086189736e-05,
8213
+ "loss": 0.2551,
8214
+ "step": 410500
8215
+ },
8216
+ {
8217
+ "epoch": 9.58,
8218
+ "learning_rate": 3.440361659286563e-05,
8219
+ "loss": 0.2547,
8220
+ "step": 411000
8221
+ },
8222
+ {
8223
+ "epoch": 9.58,
8224
+ "eval_loss": 0.23947188258171082,
8225
+ "eval_runtime": 1.4381,
8226
+ "eval_samples_per_second": 1528.365,
8227
+ "eval_steps_per_second": 24.337,
8228
+ "step": 411000
8229
+ },
8230
+ {
8231
+ "epoch": 9.59,
8232
+ "learning_rate": 3.413804192226918e-05,
8233
+ "loss": 0.2547,
8234
+ "step": 411500
8235
+ },
8236
+ {
8237
+ "epoch": 9.6,
8238
+ "learning_rate": 3.387378897868246e-05,
8239
+ "loss": 0.2544,
8240
+ "step": 412000
8241
+ },
8242
+ {
8243
+ "epoch": 9.6,
8244
+ "eval_loss": 0.23671407997608185,
8245
+ "eval_runtime": 1.4569,
8246
+ "eval_samples_per_second": 1508.674,
8247
+ "eval_steps_per_second": 24.023,
8248
+ "step": 412000
8249
+ },
8250
+ {
8251
+ "epoch": 9.61,
8252
+ "learning_rate": 3.361086065193336e-05,
8253
+ "loss": 0.2549,
8254
+ "step": 412500
8255
+ },
8256
+ {
8257
+ "epoch": 9.62,
8258
+ "learning_rate": 3.334925981736389e-05,
8259
+ "loss": 0.2548,
8260
+ "step": 413000
8261
+ },
8262
+ {
8263
+ "epoch": 9.62,
8264
+ "eval_loss": 0.23614807426929474,
8265
+ "eval_runtime": 1.4517,
8266
+ "eval_samples_per_second": 1514.052,
8267
+ "eval_steps_per_second": 24.109,
8268
+ "step": 413000
8269
+ },
8270
+ {
8271
+ "epoch": 9.63,
8272
+ "learning_rate": 3.3088989335798925e-05,
8273
+ "loss": 0.2549,
8274
+ "step": 413500
8275
+ },
8276
+ {
8277
+ "epoch": 9.65,
8278
+ "learning_rate": 3.283005205351467e-05,
8279
+ "loss": 0.2545,
8280
+ "step": 414000
8281
+ },
8282
+ {
8283
+ "epoch": 9.65,
8284
+ "eval_loss": 0.2389170378446579,
8285
+ "eval_runtime": 1.4416,
8286
+ "eval_samples_per_second": 1524.726,
8287
+ "eval_steps_per_second": 24.279,
8288
+ "step": 414000
8289
+ },
8290
+ {
8291
+ "epoch": 9.66,
8292
+ "learning_rate": 3.2572450802207845e-05,
8293
+ "loss": 0.2546,
8294
+ "step": 414500
8295
+ },
8296
+ {
8297
+ "epoch": 9.67,
8298
+ "learning_rate": 3.2316188398964344e-05,
8299
+ "loss": 0.2547,
8300
+ "step": 415000
8301
+ },
8302
+ {
8303
+ "epoch": 9.67,
8304
+ "eval_loss": 0.24014273285865784,
8305
+ "eval_runtime": 1.4644,
8306
+ "eval_samples_per_second": 1500.945,
8307
+ "eval_steps_per_second": 23.9,
8308
+ "step": 415000
8309
+ },
8310
+ {
8311
+ "epoch": 9.68,
8312
+ "learning_rate": 3.206126764622888e-05,
8313
+ "loss": 0.2543,
8314
+ "step": 415500
8315
+ },
8316
+ {
8317
+ "epoch": 9.69,
8318
+ "learning_rate": 3.180769133177392e-05,
8319
+ "loss": 0.2545,
8320
+ "step": 416000
8321
+ },
8322
+ {
8323
+ "epoch": 9.69,
8324
+ "eval_loss": 0.2388986498117447,
8325
+ "eval_runtime": 1.5029,
8326
+ "eval_samples_per_second": 1462.487,
8327
+ "eval_steps_per_second": 23.288,
8328
+ "step": 416000
8329
+ },
8330
+ {
8331
+ "epoch": 9.7,
8332
+ "learning_rate": 3.155546222866939e-05,
8333
+ "loss": 0.2543,
8334
+ "step": 416500
8335
+ },
8336
+ {
8337
+ "epoch": 9.72,
8338
+ "learning_rate": 3.130458309525239e-05,
8339
+ "loss": 0.2543,
8340
+ "step": 417000
8341
+ },
8342
+ {
8343
+ "epoch": 9.72,
8344
+ "eval_loss": 0.2375379502773285,
8345
+ "eval_runtime": 1.4847,
8346
+ "eval_samples_per_second": 1480.402,
8347
+ "eval_steps_per_second": 23.573,
8348
+ "step": 417000
8349
+ },
8350
+ {
8351
+ "epoch": 9.73,
8352
+ "learning_rate": 3.1055056675096826e-05,
8353
+ "loss": 0.2544,
8354
+ "step": 417500
8355
+ },
8356
+ {
8357
+ "epoch": 9.74,
8358
+ "learning_rate": 3.0806885696983816e-05,
8359
+ "loss": 0.2543,
8360
+ "step": 418000
8361
+ },
8362
+ {
8363
+ "epoch": 9.74,
8364
+ "eval_loss": 0.23962582647800446,
8365
+ "eval_runtime": 1.4976,
8366
+ "eval_samples_per_second": 1467.703,
8367
+ "eval_steps_per_second": 23.371,
8368
+ "step": 418000
8369
+ },
8370
+ {
8371
+ "epoch": 9.75,
8372
+ "learning_rate": 3.056007287487128e-05,
8373
+ "loss": 0.2542,
8374
+ "step": 418500
8375
+ },
8376
+ {
8377
+ "epoch": 9.76,
8378
+ "learning_rate": 3.0314620907864744e-05,
8379
+ "loss": 0.2541,
8380
+ "step": 419000
8381
+ },
8382
+ {
8383
+ "epoch": 9.76,
8384
+ "eval_loss": 0.23849867284297943,
8385
+ "eval_runtime": 1.4273,
8386
+ "eval_samples_per_second": 1539.928,
8387
+ "eval_steps_per_second": 24.521,
8388
+ "step": 419000
8389
+ },
8390
+ {
8391
+ "epoch": 9.77,
8392
+ "learning_rate": 3.0070532480187637e-05,
8393
+ "loss": 0.2541,
8394
+ "step": 419500
8395
+ },
8396
+ {
8397
+ "epoch": 9.79,
8398
+ "learning_rate": 2.9827810261151784e-05,
8399
+ "loss": 0.2541,
8400
+ "step": 420000
8401
+ },
8402
+ {
8403
+ "epoch": 9.79,
8404
+ "eval_loss": 0.2357112467288971,
8405
+ "eval_runtime": 1.4438,
8406
+ "eval_samples_per_second": 1522.319,
8407
+ "eval_steps_per_second": 24.241,
8408
+ "step": 420000
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 12,
8413
+ "total_flos": 1.341843735561084e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db26ab4777ecfb64dd27c707c9cfebcf74d431f5c02f5554c247500eee4a57f0
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
3
  size 102501541