plip commited on
Commit
994b100
1 Parent(s): 637091b

Training in progress, step 220000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20fdb368922f36248fd6b214b7b78335fe829358b5b671adc377a7586ad5707d
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a4927510127367e9500a6855c39ccf67e69ae9696103dd6206ca76c8228c9e7
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb8ed6173253524a7574f7166f4857b303d1b69af9c36814e1d9440f006d9566
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb407692499c91ceb95468636179875a29017b0607fee237df69b7c6f035183b
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf717114abf74d765a1dd76652ea701e4d472d694acc3289369476dcf9946f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de6aa60c12b8dbcda06c27a65ac30e45d66753f2960bee09a7c0ea364ae7e103
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05d01a7b2f868e6f0e645a01ef2a1ba3ad5ea16b1bff8e9cf0cf653b106d64f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.892937859689182,
5
- "global_step": 210000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4206,11 +4206,211 @@
4206
  "eval_samples_per_second": 1564.263,
4207
  "eval_steps_per_second": 24.909,
4208
  "step": 210000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4209
  }
4210
  ],
4211
  "max_steps": 500000,
4212
  "num_train_epochs": 12,
4213
- "total_flos": 6.709219926300873e+21,
4214
  "trial_name": null,
4215
  "trial_params": null
4216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.125934900626762,
5
+ "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4206
  "eval_samples_per_second": 1564.263,
4207
  "eval_steps_per_second": 24.909,
4208
  "step": 210000
4209
+ },
4210
+ {
4211
+ "epoch": 4.9,
4212
+ "learning_rate": 0.00020389135229309803,
4213
+ "loss": 0.2781,
4214
+ "step": 210500
4215
+ },
4216
+ {
4217
+ "epoch": 4.92,
4218
+ "learning_rate": 0.00020343965962416229,
4219
+ "loss": 0.2783,
4220
+ "step": 211000
4221
+ },
4222
+ {
4223
+ "epoch": 4.92,
4224
+ "eval_loss": 0.26213955879211426,
4225
+ "eval_runtime": 1.4602,
4226
+ "eval_samples_per_second": 1505.253,
4227
+ "eval_steps_per_second": 23.969,
4228
+ "step": 211000
4229
+ },
4230
+ {
4231
+ "epoch": 4.93,
4232
+ "learning_rate": 0.00020298743722686958,
4233
+ "loss": 0.2778,
4234
+ "step": 211500
4235
+ },
4236
+ {
4237
+ "epoch": 4.94,
4238
+ "learning_rate": 0.0002025346900466516,
4239
+ "loss": 0.2781,
4240
+ "step": 212000
4241
+ },
4242
+ {
4243
+ "epoch": 4.94,
4244
+ "eval_loss": 0.2614113390445709,
4245
+ "eval_runtime": 1.4246,
4246
+ "eval_samples_per_second": 1542.853,
4247
+ "eval_steps_per_second": 24.568,
4248
+ "step": 212000
4249
+ },
4250
+ {
4251
+ "epoch": 4.95,
4252
+ "learning_rate": 0.0002020814230346791,
4253
+ "loss": 0.2778,
4254
+ "step": 212500
4255
+ },
4256
+ {
4257
+ "epoch": 4.96,
4258
+ "learning_rate": 0.00020162764114780733,
4259
+ "loss": 0.2779,
4260
+ "step": 213000
4261
+ },
4262
+ {
4263
+ "epoch": 4.96,
4264
+ "eval_loss": 0.2597576975822449,
4265
+ "eval_runtime": 1.4338,
4266
+ "eval_samples_per_second": 1532.976,
4267
+ "eval_steps_per_second": 24.41,
4268
+ "step": 213000
4269
+ },
4270
+ {
4271
+ "epoch": 4.97,
4272
+ "learning_rate": 0.0002011733493485224,
4273
+ "loss": 0.2778,
4274
+ "step": 213500
4275
+ },
4276
+ {
4277
+ "epoch": 4.99,
4278
+ "learning_rate": 0.00020071855260488664,
4279
+ "loss": 0.2776,
4280
+ "step": 214000
4281
+ },
4282
+ {
4283
+ "epoch": 4.99,
4284
+ "eval_loss": 0.2594500780105591,
4285
+ "eval_runtime": 1.4614,
4286
+ "eval_samples_per_second": 1504.034,
4287
+ "eval_steps_per_second": 23.95,
4288
+ "step": 214000
4289
+ },
4290
+ {
4291
+ "epoch": 5.0,
4292
+ "learning_rate": 0.0002002632558904843,
4293
+ "loss": 0.2777,
4294
+ "step": 214500
4295
+ },
4296
+ {
4297
+ "epoch": 5.01,
4298
+ "learning_rate": 0.00019980746418436736,
4299
+ "loss": 0.2774,
4300
+ "step": 215000
4301
+ },
4302
+ {
4303
+ "epoch": 5.01,
4304
+ "eval_loss": 0.2608456611633301,
4305
+ "eval_runtime": 1.4098,
4306
+ "eval_samples_per_second": 1559.139,
4307
+ "eval_steps_per_second": 24.827,
4308
+ "step": 215000
4309
+ },
4310
+ {
4311
+ "epoch": 5.02,
4312
+ "learning_rate": 0.00019935118247100088,
4313
+ "loss": 0.2777,
4314
+ "step": 215500
4315
+ },
4316
+ {
4317
+ "epoch": 5.03,
4318
+ "learning_rate": 0.00019889441574020864,
4319
+ "loss": 0.2782,
4320
+ "step": 216000
4321
+ },
4322
+ {
4323
+ "epoch": 5.03,
4324
+ "eval_loss": 0.26146388053894043,
4325
+ "eval_runtime": 1.4335,
4326
+ "eval_samples_per_second": 1533.337,
4327
+ "eval_steps_per_second": 24.416,
4328
+ "step": 216000
4329
+ },
4330
+ {
4331
+ "epoch": 5.04,
4332
+ "learning_rate": 0.0001984371689871183,
4333
+ "loss": 0.2775,
4334
+ "step": 216500
4335
+ },
4336
+ {
4337
+ "epoch": 5.06,
4338
+ "learning_rate": 0.00019797944721210725,
4339
+ "loss": 0.2779,
4340
+ "step": 217000
4341
+ },
4342
+ {
4343
+ "epoch": 5.06,
4344
+ "eval_loss": 0.26257631182670593,
4345
+ "eval_runtime": 1.4244,
4346
+ "eval_samples_per_second": 1543.156,
4347
+ "eval_steps_per_second": 24.573,
4348
+ "step": 217000
4349
+ },
4350
+ {
4351
+ "epoch": 5.07,
4352
+ "learning_rate": 0.00019752125542074736,
4353
+ "loss": 0.2785,
4354
+ "step": 217500
4355
+ },
4356
+ {
4357
+ "epoch": 5.08,
4358
+ "learning_rate": 0.00019706259862375074,
4359
+ "loss": 0.2768,
4360
+ "step": 218000
4361
+ },
4362
+ {
4363
+ "epoch": 5.08,
4364
+ "eval_loss": 0.25935330986976624,
4365
+ "eval_runtime": 1.4288,
4366
+ "eval_samples_per_second": 1538.396,
4367
+ "eval_steps_per_second": 24.497,
4368
+ "step": 218000
4369
+ },
4370
+ {
4371
+ "epoch": 5.09,
4372
+ "learning_rate": 0.00019660348183691453,
4373
+ "loss": 0.277,
4374
+ "step": 218500
4375
+ },
4376
+ {
4377
+ "epoch": 5.1,
4378
+ "learning_rate": 0.0001961439100810664,
4379
+ "loss": 0.2774,
4380
+ "step": 219000
4381
+ },
4382
+ {
4383
+ "epoch": 5.1,
4384
+ "eval_loss": 0.2606256604194641,
4385
+ "eval_runtime": 1.4284,
4386
+ "eval_samples_per_second": 1538.738,
4387
+ "eval_steps_per_second": 24.502,
4388
+ "step": 219000
4389
+ },
4390
+ {
4391
+ "epoch": 5.11,
4392
+ "learning_rate": 0.00019568388838200952,
4393
+ "loss": 0.277,
4394
+ "step": 219500
4395
+ },
4396
+ {
4397
+ "epoch": 5.13,
4398
+ "learning_rate": 0.00019522342177046744,
4399
+ "loss": 0.2767,
4400
+ "step": 220000
4401
+ },
4402
+ {
4403
+ "epoch": 5.13,
4404
+ "eval_loss": 0.2605719566345215,
4405
+ "eval_runtime": 1.4163,
4406
+ "eval_samples_per_second": 1551.973,
4407
+ "eval_steps_per_second": 24.713,
4408
+ "step": 220000
4409
  }
4410
  ],
4411
  "max_steps": 500000,
4412
  "num_train_epochs": 12,
4413
+ "total_flos": 7.028704568084466e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb8ed6173253524a7574f7166f4857b303d1b69af9c36814e1d9440f006d9566
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb407692499c91ceb95468636179875a29017b0607fee237df69b7c6f035183b
3
  size 102501541