plip commited on
Commit
e59df61
1 Parent(s): 71b8c17

Training in progress, step 370000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3622d2c4bf20d15081328014fbc84571924f388ee37dce9d92056a095aeebdc
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6d3d3f9674103740b8f59e2a1f3f36fbba555fa4f14347ef60833e8c7c8d0f
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca3e8665bb839f68d9a5ff24783ce35763f85a0cda61cb52ae923170ae03041
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525c3bf31d263794683cccd40e43f9684bf527b10941501bd249dbeba9fd1806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.387893473752884,
5
- "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7206,11 +7206,211 @@
7206
  "eval_samples_per_second": 1504.148,
7207
  "eval_steps_per_second": 23.951,
7208
  "step": 360000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 12,
7213
- "total_flos": 1.1501517019954746e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.620890514690464,
5
+ "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7206
  "eval_samples_per_second": 1504.148,
7207
  "eval_steps_per_second": 23.951,
7208
  "step": 360000
7209
+ },
7210
+ {
7211
+ "epoch": 8.4,
7212
+ "learning_rate": 6.746049527150238e-05,
7213
+ "loss": 0.2596,
7214
+ "step": 360500
7215
+ },
7216
+ {
7217
+ "epoch": 8.41,
7218
+ "learning_rate": 6.707871421704209e-05,
7219
+ "loss": 0.2596,
7220
+ "step": 361000
7221
+ },
7222
+ {
7223
+ "epoch": 8.41,
7224
+ "eval_loss": 0.24351955950260162,
7225
+ "eval_runtime": 1.4786,
7226
+ "eval_samples_per_second": 1486.571,
7227
+ "eval_steps_per_second": 23.672,
7228
+ "step": 361000
7229
+ },
7230
+ {
7231
+ "epoch": 8.42,
7232
+ "learning_rate": 6.669789465567683e-05,
7233
+ "loss": 0.2593,
7234
+ "step": 361500
7235
+ },
7236
+ {
7237
+ "epoch": 8.43,
7238
+ "learning_rate": 6.631804075198838e-05,
7239
+ "loss": 0.2595,
7240
+ "step": 362000
7241
+ },
7242
+ {
7243
+ "epoch": 8.43,
7244
+ "eval_loss": 0.2438839077949524,
7245
+ "eval_runtime": 1.4636,
7246
+ "eval_samples_per_second": 1501.733,
7247
+ "eval_steps_per_second": 23.913,
7248
+ "step": 362000
7249
+ },
7250
+ {
7251
+ "epoch": 8.45,
7252
+ "learning_rate": 6.593915665999816e-05,
7253
+ "loss": 0.2593,
7254
+ "step": 362500
7255
+ },
7256
+ {
7257
+ "epoch": 8.46,
7258
+ "learning_rate": 6.55612465231219e-05,
7259
+ "loss": 0.2593,
7260
+ "step": 363000
7261
+ },
7262
+ {
7263
+ "epoch": 8.46,
7264
+ "eval_loss": 0.24523594975471497,
7265
+ "eval_runtime": 1.4077,
7266
+ "eval_samples_per_second": 1561.38,
7267
+ "eval_steps_per_second": 24.863,
7268
+ "step": 363000
7269
+ },
7270
+ {
7271
+ "epoch": 8.47,
7272
+ "learning_rate": 6.518431447412434e-05,
7273
+ "loss": 0.2593,
7274
+ "step": 363500
7275
+ },
7276
+ {
7277
+ "epoch": 8.48,
7278
+ "learning_rate": 6.480836463507392e-05,
7279
+ "loss": 0.2593,
7280
+ "step": 364000
7281
+ },
7282
+ {
7283
+ "epoch": 8.48,
7284
+ "eval_loss": 0.24123093485832214,
7285
+ "eval_runtime": 1.4526,
7286
+ "eval_samples_per_second": 1513.098,
7287
+ "eval_steps_per_second": 24.094,
7288
+ "step": 364000
7289
+ },
7290
+ {
7291
+ "epoch": 8.49,
7292
+ "learning_rate": 6.443340111729786e-05,
7293
+ "loss": 0.259,
7294
+ "step": 364500
7295
+ },
7296
+ {
7297
+ "epoch": 8.5,
7298
+ "learning_rate": 6.405942802133713e-05,
7299
+ "loss": 0.2592,
7300
+ "step": 365000
7301
+ },
7302
+ {
7303
+ "epoch": 8.5,
7304
+ "eval_loss": 0.24300608038902283,
7305
+ "eval_runtime": 1.5063,
7306
+ "eval_samples_per_second": 1459.248,
7307
+ "eval_steps_per_second": 23.236,
7308
+ "step": 365000
7309
+ },
7310
+ {
7311
+ "epoch": 8.52,
7312
+ "learning_rate": 6.36864494369016e-05,
7313
+ "loss": 0.259,
7314
+ "step": 365500
7315
+ },
7316
+ {
7317
+ "epoch": 8.53,
7318
+ "learning_rate": 6.331446944282534e-05,
7319
+ "loss": 0.2587,
7320
+ "step": 366000
7321
+ },
7322
+ {
7323
+ "epoch": 8.53,
7324
+ "eval_loss": 0.24051520228385925,
7325
+ "eval_runtime": 1.4527,
7326
+ "eval_samples_per_second": 1513.01,
7327
+ "eval_steps_per_second": 24.093,
7328
+ "step": 366000
7329
+ },
7330
+ {
7331
+ "epoch": 8.54,
7332
+ "learning_rate": 6.294349210702188e-05,
7333
+ "loss": 0.2589,
7334
+ "step": 366500
7335
+ },
7336
+ {
7337
+ "epoch": 8.55,
7338
+ "learning_rate": 6.257352148643998e-05,
7339
+ "loss": 0.2591,
7340
+ "step": 367000
7341
+ },
7342
+ {
7343
+ "epoch": 8.55,
7344
+ "eval_loss": 0.2446797788143158,
7345
+ "eval_runtime": 1.4184,
7346
+ "eval_samples_per_second": 1549.604,
7347
+ "eval_steps_per_second": 24.675,
7348
+ "step": 367000
7349
+ },
7350
+ {
7351
+ "epoch": 8.56,
7352
+ "learning_rate": 6.220456162701908e-05,
7353
+ "loss": 0.2589,
7354
+ "step": 367500
7355
+ },
7356
+ {
7357
+ "epoch": 8.57,
7358
+ "learning_rate": 6.183661656364515e-05,
7359
+ "loss": 0.2586,
7360
+ "step": 368000
7361
+ },
7362
+ {
7363
+ "epoch": 8.57,
7364
+ "eval_loss": 0.24278691411018372,
7365
+ "eval_runtime": 1.4508,
7366
+ "eval_samples_per_second": 1515.055,
7367
+ "eval_steps_per_second": 24.125,
7368
+ "step": 368000
7369
+ },
7370
+ {
7371
+ "epoch": 8.59,
7372
+ "learning_rate": 6.146969032010631e-05,
7373
+ "loss": 0.2589,
7374
+ "step": 368500
7375
+ },
7376
+ {
7377
+ "epoch": 8.6,
7378
+ "learning_rate": 6.110378690904928e-05,
7379
+ "loss": 0.259,
7380
+ "step": 369000
7381
+ },
7382
+ {
7383
+ "epoch": 8.6,
7384
+ "eval_loss": 0.24149444699287415,
7385
+ "eval_runtime": 1.4509,
7386
+ "eval_samples_per_second": 1514.912,
7387
+ "eval_steps_per_second": 24.123,
7388
+ "step": 369000
7389
+ },
7390
+ {
7391
+ "epoch": 8.61,
7392
+ "learning_rate": 6.073891033193507e-05,
7393
+ "loss": 0.259,
7394
+ "step": 369500
7395
+ },
7396
+ {
7397
+ "epoch": 8.62,
7398
+ "learning_rate": 6.037506457899553e-05,
7399
+ "loss": 0.2588,
7400
+ "step": 370000
7401
+ },
7402
+ {
7403
+ "epoch": 8.62,
7404
+ "eval_loss": 0.23884567618370056,
7405
+ "eval_runtime": 1.4545,
7406
+ "eval_samples_per_second": 1511.147,
7407
+ "eval_steps_per_second": 24.063,
7408
+ "step": 370000
7409
  }
7410
  ],
7411
  "max_steps": 500000,
7412
  "num_train_epochs": 12,
7413
+ "total_flos": 1.1821004158729246e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca3e8665bb839f68d9a5ff24783ce35763f85a0cda61cb52ae923170ae03041
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
3
  size 102501541