plip commited on
Commit
ba21848
1 Parent(s): 326b0df

Training in progress, step 270000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:406ff60c9fbc96f08fa188658defda9ad6b5e381b6cf799e20366bdf81afbd2a
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db59304ac09b28bcb3c65afafd5ee1982c926ae9e5b3be0a3041e16925dc09d
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e0c29fd1ad93ebeb1e95f532ec268fb56806f7c232e1c94e4622474be46744f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6481ae1099357d8bcf88c484382e1abee67e04204a9282cec0553dec29988327
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b23f32e817f69e70978a71b81655fa6e8769bb16cbe73bed041ad33f8df64bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.057923064377082,
5
- "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5206,11 +5206,211 @@
5206
  "eval_samples_per_second": 1533.386,
5207
  "eval_steps_per_second": 24.417,
5208
  "step": 260000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 12,
5213
- "total_flos": 8.30665062619156e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.290920105314663,
5
+ "global_step": 270000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5206
  "eval_samples_per_second": 1533.386,
5207
  "eval_steps_per_second": 24.417,
5208
  "step": 260000
5209
+ },
5210
+ {
5211
+ "epoch": 6.07,
5212
+ "learning_rate": 0.00015691796905504187,
5213
+ "loss": 0.2712,
5214
+ "step": 260500
5215
+ },
5216
+ {
5217
+ "epoch": 6.08,
5218
+ "learning_rate": 0.00015643849514435944,
5219
+ "loss": 0.2709,
5220
+ "step": 261000
5221
+ },
5222
+ {
5223
+ "epoch": 6.08,
5224
+ "eval_loss": 0.2548723816871643,
5225
+ "eval_runtime": 1.4314,
5226
+ "eval_samples_per_second": 1535.607,
5227
+ "eval_steps_per_second": 24.452,
5228
+ "step": 261000
5229
+ },
5230
+ {
5231
+ "epoch": 6.09,
5232
+ "learning_rate": 0.00015595900550252463,
5233
+ "loss": 0.2707,
5234
+ "step": 261500
5235
+ },
5236
+ {
5237
+ "epoch": 6.1,
5238
+ "learning_rate": 0.00015547950537315926,
5239
+ "loss": 0.2708,
5240
+ "step": 262000
5241
+ },
5242
+ {
5243
+ "epoch": 6.1,
5244
+ "eval_loss": 0.2543224096298218,
5245
+ "eval_runtime": 1.4239,
5246
+ "eval_samples_per_second": 1543.639,
5247
+ "eval_steps_per_second": 24.58,
5248
+ "step": 262000
5249
+ },
5250
+ {
5251
+ "epoch": 6.12,
5252
+ "learning_rate": 0.00015499999999999997,
5253
+ "loss": 0.2708,
5254
+ "step": 262500
5255
+ },
5256
+ {
5257
+ "epoch": 6.13,
5258
+ "learning_rate": 0.00015452049462684068,
5259
+ "loss": 0.2719,
5260
+ "step": 263000
5261
+ },
5262
+ {
5263
+ "epoch": 6.13,
5264
+ "eval_loss": 0.25397995114326477,
5265
+ "eval_runtime": 1.4586,
5266
+ "eval_samples_per_second": 1506.925,
5267
+ "eval_steps_per_second": 23.996,
5268
+ "step": 263000
5269
+ },
5270
+ {
5271
+ "epoch": 6.14,
5272
+ "learning_rate": 0.00015404099449747535,
5273
+ "loss": 0.2712,
5274
+ "step": 263500
5275
+ },
5276
+ {
5277
+ "epoch": 6.15,
5278
+ "learning_rate": 0.0001535615048556405,
5279
+ "loss": 0.2706,
5280
+ "step": 264000
5281
+ },
5282
+ {
5283
+ "epoch": 6.15,
5284
+ "eval_loss": 0.2522125244140625,
5285
+ "eval_runtime": 1.474,
5286
+ "eval_samples_per_second": 1491.135,
5287
+ "eval_steps_per_second": 23.744,
5288
+ "step": 264000
5289
+ },
5290
+ {
5291
+ "epoch": 6.16,
5292
+ "learning_rate": 0.0001530820309449581,
5293
+ "loss": 0.2705,
5294
+ "step": 264500
5295
+ },
5296
+ {
5297
+ "epoch": 6.17,
5298
+ "learning_rate": 0.00015260257800887798,
5299
+ "loss": 0.2705,
5300
+ "step": 265000
5301
+ },
5302
+ {
5303
+ "epoch": 6.17,
5304
+ "eval_loss": 0.25516122579574585,
5305
+ "eval_runtime": 1.3844,
5306
+ "eval_samples_per_second": 1587.692,
5307
+ "eval_steps_per_second": 25.282,
5308
+ "step": 265000
5309
+ },
5310
+ {
5311
+ "epoch": 6.19,
5312
+ "learning_rate": 0.0001521231512906207,
5313
+ "loss": 0.2709,
5314
+ "step": 265500
5315
+ },
5316
+ {
5317
+ "epoch": 6.2,
5318
+ "learning_rate": 0.00015164375603311998,
5319
+ "loss": 0.2708,
5320
+ "step": 266000
5321
+ },
5322
+ {
5323
+ "epoch": 6.2,
5324
+ "eval_loss": 0.25247690081596375,
5325
+ "eval_runtime": 1.4156,
5326
+ "eval_samples_per_second": 1552.724,
5327
+ "eval_steps_per_second": 24.725,
5328
+ "step": 266000
5329
+ },
5330
+ {
5331
+ "epoch": 6.21,
5332
+ "learning_rate": 0.00015116439747896553,
5333
+ "loss": 0.271,
5334
+ "step": 266500
5335
+ },
5336
+ {
5337
+ "epoch": 6.22,
5338
+ "learning_rate": 0.00015068508087034578,
5339
+ "loss": 0.2704,
5340
+ "step": 267000
5341
+ },
5342
+ {
5343
+ "epoch": 6.22,
5344
+ "eval_loss": 0.25168919563293457,
5345
+ "eval_runtime": 1.4279,
5346
+ "eval_samples_per_second": 1539.31,
5347
+ "eval_steps_per_second": 24.511,
5348
+ "step": 267000
5349
+ },
5350
+ {
5351
+ "epoch": 6.23,
5352
+ "learning_rate": 0.00015020581144899027,
5353
+ "loss": 0.2702,
5354
+ "step": 267500
5355
+ },
5356
+ {
5357
+ "epoch": 6.24,
5358
+ "learning_rate": 0.0001497265944561127,
5359
+ "loss": 0.2701,
5360
+ "step": 268000
5361
+ },
5362
+ {
5363
+ "epoch": 6.24,
5364
+ "eval_loss": 0.2535783052444458,
5365
+ "eval_runtime": 1.4078,
5366
+ "eval_samples_per_second": 1561.345,
5367
+ "eval_steps_per_second": 24.862,
5368
+ "step": 268000
5369
+ },
5370
+ {
5371
+ "epoch": 6.26,
5372
+ "learning_rate": 0.00014924743513235327,
5373
+ "loss": 0.2703,
5374
+ "step": 268500
5375
+ },
5376
+ {
5377
+ "epoch": 6.27,
5378
+ "learning_rate": 0.0001487683387177216,
5379
+ "loss": 0.2704,
5380
+ "step": 269000
5381
+ },
5382
+ {
5383
+ "epoch": 6.27,
5384
+ "eval_loss": 0.25389179587364197,
5385
+ "eval_runtime": 1.4483,
5386
+ "eval_samples_per_second": 1517.622,
5387
+ "eval_steps_per_second": 24.166,
5388
+ "step": 269000
5389
+ },
5390
+ {
5391
+ "epoch": 6.28,
5392
+ "learning_rate": 0.00014828931045153928,
5393
+ "loss": 0.2703,
5394
+ "step": 269500
5395
+ },
5396
+ {
5397
+ "epoch": 6.29,
5398
+ "learning_rate": 0.00014781035557238272,
5399
+ "loss": 0.2702,
5400
+ "step": 270000
5401
+ },
5402
+ {
5403
+ "epoch": 6.29,
5404
+ "eval_loss": 0.2548398971557617,
5405
+ "eval_runtime": 1.4524,
5406
+ "eval_samples_per_second": 1513.389,
5407
+ "eval_steps_per_second": 24.099,
5408
+ "step": 270000
5409
  }
5410
  ],
5411
  "max_steps": 500000,
5412
  "num_train_epochs": 12,
5413
+ "total_flos": 8.626137764966059e+21,
5414
  "trial_name": null,
5415
  "trial_params": null
5416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e0c29fd1ad93ebeb1e95f532ec268fb56806f7c232e1c94e4622474be46744f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6481ae1099357d8bcf88c484382e1abee67e04204a9282cec0553dec29988327
3
  size 102501541