plip commited on
Commit
c012647
1 Parent(s): 80774d1

Training in progress, step 470000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:583b1f1834e02218d89c3b697d47dd796ee6e2f54da76dbfa4b2b4a392308637
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267d3df636a0162d33e130374595442a55c8d8e98b166e9f3f7d0083011ee911
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2373aeb4dd8a73f874cd12197ac54e4d9ac9c719856821eb66aa7befcac8c3
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241694889d5343c5f04a75e7c4db1bba90bea2ebc6296d61306df81c3305c3d0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.717863883128684,
5
- "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9206,11 +9206,211 @@
9206
  "eval_samples_per_second": 1542.873,
9207
  "eval_steps_per_second": 24.568,
9208
  "step": 460000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 12,
9213
- "total_flos": 1.4696383413717932e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.950860924066264,
5
+ "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9206
  "eval_samples_per_second": 1542.873,
9207
  "eval_steps_per_second": 24.568,
9208
  "step": 460000
9209
+ },
9210
+ {
9211
+ "epoch": 10.73,
9212
+ "learning_rate": 1.4920092237448903e-05,
9213
+ "loss": 0.2514,
9214
+ "step": 460500
9215
+ },
9216
+ {
9217
+ "epoch": 10.74,
9218
+ "learning_rate": 1.4797008356541874e-05,
9219
+ "loss": 0.2513,
9220
+ "step": 461000
9221
+ },
9222
+ {
9223
+ "epoch": 10.74,
9224
+ "eval_loss": 0.2357269674539566,
9225
+ "eval_runtime": 1.4097,
9226
+ "eval_samples_per_second": 1559.248,
9227
+ "eval_steps_per_second": 24.829,
9228
+ "step": 461000
9229
+ },
9230
+ {
9231
+ "epoch": 10.75,
9232
+ "learning_rate": 1.4675457713088947e-05,
9233
+ "loss": 0.2513,
9234
+ "step": 461500
9235
+ },
9236
+ {
9237
+ "epoch": 10.76,
9238
+ "learning_rate": 1.4555441636348494e-05,
9239
+ "loss": 0.2516,
9240
+ "step": 462000
9241
+ },
9242
+ {
9243
+ "epoch": 10.76,
9244
+ "eval_loss": 0.2352001965045929,
9245
+ "eval_runtime": 1.4391,
9246
+ "eval_samples_per_second": 1527.384,
9247
+ "eval_steps_per_second": 24.321,
9248
+ "step": 462000
9249
+ },
9250
+ {
9251
+ "epoch": 10.78,
9252
+ "learning_rate": 1.4436961438797095e-05,
9253
+ "loss": 0.2512,
9254
+ "step": 462500
9255
+ },
9256
+ {
9257
+ "epoch": 10.79,
9258
+ "learning_rate": 1.4320018416115206e-05,
9259
+ "loss": 0.2511,
9260
+ "step": 463000
9261
+ },
9262
+ {
9263
+ "epoch": 10.79,
9264
+ "eval_loss": 0.23254527151584625,
9265
+ "eval_runtime": 1.4508,
9266
+ "eval_samples_per_second": 1514.977,
9267
+ "eval_steps_per_second": 24.124,
9268
+ "step": 463000
9269
+ },
9270
+ {
9271
+ "epoch": 10.8,
9272
+ "learning_rate": 1.4204613847173003e-05,
9273
+ "loss": 0.2513,
9274
+ "step": 463500
9275
+ },
9276
+ {
9277
+ "epoch": 10.81,
9278
+ "learning_rate": 1.4090748994016354e-05,
9279
+ "loss": 0.2515,
9280
+ "step": 464000
9281
+ },
9282
+ {
9283
+ "epoch": 10.81,
9284
+ "eval_loss": 0.23643817007541656,
9285
+ "eval_runtime": 1.4271,
9286
+ "eval_samples_per_second": 1540.18,
9287
+ "eval_steps_per_second": 24.525,
9288
+ "step": 464000
9289
+ },
9290
+ {
9291
+ "epoch": 10.82,
9292
+ "learning_rate": 1.3978425101853049e-05,
9293
+ "loss": 0.2513,
9294
+ "step": 464500
9295
+ },
9296
+ {
9297
+ "epoch": 10.83,
9298
+ "learning_rate": 1.3867643399039165e-05,
9299
+ "loss": 0.2509,
9300
+ "step": 465000
9301
+ },
9302
+ {
9303
+ "epoch": 10.83,
9304
+ "eval_loss": 0.2379205971956253,
9305
+ "eval_runtime": 1.4615,
9306
+ "eval_samples_per_second": 1503.93,
9307
+ "eval_steps_per_second": 23.948,
9308
+ "step": 465000
9309
+ },
9310
+ {
9311
+ "epoch": 10.85,
9312
+ "learning_rate": 1.3758405097065648e-05,
9313
+ "loss": 0.2515,
9314
+ "step": 465500
9315
+ },
9316
+ {
9317
+ "epoch": 10.86,
9318
+ "learning_rate": 1.3650711390545131e-05,
9319
+ "loss": 0.2511,
9320
+ "step": 466000
9321
+ },
9322
+ {
9323
+ "epoch": 10.86,
9324
+ "eval_loss": 0.2360716015100479,
9325
+ "eval_runtime": 1.4528,
9326
+ "eval_samples_per_second": 1512.979,
9327
+ "eval_steps_per_second": 24.092,
9328
+ "step": 466000
9329
+ },
9330
+ {
9331
+ "epoch": 10.87,
9332
+ "learning_rate": 1.3544563457198657e-05,
9333
+ "loss": 0.2514,
9334
+ "step": 466500
9335
+ },
9336
+ {
9337
+ "epoch": 10.88,
9338
+ "learning_rate": 1.343996245784307e-05,
9339
+ "loss": 0.2513,
9340
+ "step": 467000
9341
+ },
9342
+ {
9343
+ "epoch": 10.88,
9344
+ "eval_loss": 0.2349902093410492,
9345
+ "eval_runtime": 1.4197,
9346
+ "eval_samples_per_second": 1548.226,
9347
+ "eval_steps_per_second": 24.653,
9348
+ "step": 467000
9349
+ },
9350
+ {
9351
+ "epoch": 10.89,
9352
+ "learning_rate": 1.3336909536378107e-05,
9353
+ "loss": 0.2511,
9354
+ "step": 467500
9355
+ },
9356
+ {
9357
+ "epoch": 10.9,
9358
+ "learning_rate": 1.3235405819774022e-05,
9359
+ "loss": 0.2509,
9360
+ "step": 468000
9361
+ },
9362
+ {
9363
+ "epoch": 10.9,
9364
+ "eval_loss": 0.23496830463409424,
9365
+ "eval_runtime": 1.4553,
9366
+ "eval_samples_per_second": 1510.368,
9367
+ "eval_steps_per_second": 24.05,
9368
+ "step": 468000
9369
+ },
9370
+ {
9371
+ "epoch": 10.92,
9372
+ "learning_rate": 1.3135452418059208e-05,
9373
+ "loss": 0.2513,
9374
+ "step": 468500
9375
+ },
9376
+ {
9377
+ "epoch": 10.93,
9378
+ "learning_rate": 1.3037050424308027e-05,
9379
+ "loss": 0.2512,
9380
+ "step": 469000
9381
+ },
9382
+ {
9383
+ "epoch": 10.93,
9384
+ "eval_loss": 0.23394964635372162,
9385
+ "eval_runtime": 1.4438,
9386
+ "eval_samples_per_second": 1522.345,
9387
+ "eval_steps_per_second": 24.241,
9388
+ "step": 469000
9389
+ },
9390
+ {
9391
+ "epoch": 10.94,
9392
+ "learning_rate": 1.2940200914628945e-05,
9393
+ "loss": 0.251,
9394
+ "step": 469500
9395
+ },
9396
+ {
9397
+ "epoch": 10.95,
9398
+ "learning_rate": 1.2844904948152644e-05,
9399
+ "loss": 0.2508,
9400
+ "step": 470000
9401
+ },
9402
+ {
9403
+ "epoch": 10.95,
9404
+ "eval_loss": 0.23325932025909424,
9405
+ "eval_runtime": 1.4563,
9406
+ "eval_samples_per_second": 1509.326,
9407
+ "eval_steps_per_second": 24.034,
9408
+ "step": 470000
9409
  }
9410
  ],
9411
  "max_steps": 500000,
9412
  "num_train_epochs": 12,
9413
+ "total_flos": 1.5015870552492432e+22,
9414
  "trial_name": null,
9415
  "trial_params": null
9416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2373aeb4dd8a73f874cd12197ac54e4d9ac9c719856821eb66aa7befcac8c3
3
  size 102501541