plip commited on
Commit
5345174
1 Parent(s): 994b100

Training in progress, step 230000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a4927510127367e9500a6855c39ccf67e69ae9696103dd6206ca76c8228c9e7
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133edef03c1d84d7f47c379bb8627e715102779ac7fbfbbc81cf76fdb3857138
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb407692499c91ceb95468636179875a29017b0607fee237df69b7c6f035183b
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3e57cb5c11d79b27478df38200399f2d7e69d1e9da7282bcbb17ee802e3b1d
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a2409314cc97cd170cf372a0918ec1a71b59dc692c1f5814e6639647692d18c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a05d01a7b2f868e6f0e645a01ef2a1ba3ad5ea16b1bff8e9cf0cf653b106d64f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a2bb37359d05bc7917d91ab1261ba8c4d8f00648cd8cb2d11c677b6c91ddb27
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.125934900626762,
5
- "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4406,11 +4406,211 @@
4406
  "eval_samples_per_second": 1551.973,
4407
  "eval_steps_per_second": 24.713,
4408
  "step": 220000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4409
  }
4410
  ],
4411
  "max_steps": 500000,
4412
  "num_train_epochs": 12,
4413
- "total_flos": 7.028704568084466e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.358931941564342,
5
+ "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4406
  "eval_samples_per_second": 1551.973,
4407
  "eval_steps_per_second": 24.713,
4408
  "step": 220000
4409
+ },
4410
+ {
4411
+ "epoch": 5.14,
4412
+ "learning_rate": 0.00019476251528202922,
4413
+ "loss": 0.2773,
4414
+ "step": 220500
4415
+ },
4416
+ {
4417
+ "epoch": 5.15,
4418
+ "learning_rate": 0.0001943011739570944,
4419
+ "loss": 0.2763,
4420
+ "step": 221000
4421
+ },
4422
+ {
4423
+ "epoch": 5.15,
4424
+ "eval_loss": 0.26054272055625916,
4425
+ "eval_runtime": 1.4233,
4426
+ "eval_samples_per_second": 1544.255,
4427
+ "eval_steps_per_second": 24.59,
4428
+ "step": 221000
4429
+ },
4430
+ {
4431
+ "epoch": 5.16,
4432
+ "learning_rate": 0.00019383940284081774,
4433
+ "loss": 0.2766,
4434
+ "step": 221500
4435
+ },
4436
+ {
4437
+ "epoch": 5.17,
4438
+ "learning_rate": 0.00019337720698305431,
4439
+ "loss": 0.2766,
4440
+ "step": 222000
4441
+ },
4442
+ {
4443
+ "epoch": 5.17,
4444
+ "eval_loss": 0.25992023944854736,
4445
+ "eval_runtime": 1.4053,
4446
+ "eval_samples_per_second": 1564.073,
4447
+ "eval_steps_per_second": 24.906,
4448
+ "step": 222000
4449
+ },
4450
+ {
4451
+ "epoch": 5.18,
4452
+ "learning_rate": 0.0001929145914383038,
4453
+ "loss": 0.2763,
4454
+ "step": 222500
4455
+ },
4456
+ {
4457
+ "epoch": 5.2,
4458
+ "learning_rate": 0.00019245156126565586,
4459
+ "loss": 0.2763,
4460
+ "step": 223000
4461
+ },
4462
+ {
4463
+ "epoch": 5.2,
4464
+ "eval_loss": 0.2582224905490875,
4465
+ "eval_runtime": 1.4422,
4466
+ "eval_samples_per_second": 1524.087,
4467
+ "eval_steps_per_second": 24.269,
4468
+ "step": 223000
4469
+ },
4470
+ {
4471
+ "epoch": 5.21,
4472
+ "learning_rate": 0.00019198812152873416,
4473
+ "loss": 0.2762,
4474
+ "step": 223500
4475
+ },
4476
+ {
4477
+ "epoch": 5.22,
4478
+ "learning_rate": 0.00019152427729564144,
4479
+ "loss": 0.2764,
4480
+ "step": 224000
4481
+ },
4482
+ {
4483
+ "epoch": 5.22,
4484
+ "eval_loss": 0.26058298349380493,
4485
+ "eval_runtime": 1.4571,
4486
+ "eval_samples_per_second": 1508.477,
4487
+ "eval_steps_per_second": 24.02,
4488
+ "step": 224000
4489
+ },
4490
+ {
4491
+ "epoch": 5.23,
4492
+ "learning_rate": 0.00019106003363890395,
4493
+ "loss": 0.2761,
4494
+ "step": 224500
4495
+ },
4496
+ {
4497
+ "epoch": 5.24,
4498
+ "learning_rate": 0.00019059539563541584,
4499
+ "loss": 0.276,
4500
+ "step": 225000
4501
+ },
4502
+ {
4503
+ "epoch": 5.24,
4504
+ "eval_loss": 0.25854378938674927,
4505
+ "eval_runtime": 1.4732,
4506
+ "eval_samples_per_second": 1491.985,
4507
+ "eval_steps_per_second": 23.758,
4508
+ "step": 225000
4509
+ },
4510
+ {
4511
+ "epoch": 5.25,
4512
+ "learning_rate": 0.000190130368366384,
4513
+ "loss": 0.2761,
4514
+ "step": 225500
4515
+ },
4516
+ {
4517
+ "epoch": 5.27,
4518
+ "learning_rate": 0.00018966495691727207,
4519
+ "loss": 0.2758,
4520
+ "step": 226000
4521
+ },
4522
+ {
4523
+ "epoch": 5.27,
4524
+ "eval_loss": 0.26082664728164673,
4525
+ "eval_runtime": 1.3852,
4526
+ "eval_samples_per_second": 1586.815,
4527
+ "eval_steps_per_second": 25.268,
4528
+ "step": 226000
4529
+ },
4530
+ {
4531
+ "epoch": 5.28,
4532
+ "learning_rate": 0.0001891991663777451,
4533
+ "loss": 0.2759,
4534
+ "step": 226500
4535
+ },
4536
+ {
4537
+ "epoch": 5.29,
4538
+ "learning_rate": 0.00018873300184161387,
4539
+ "loss": 0.2761,
4540
+ "step": 227000
4541
+ },
4542
+ {
4543
+ "epoch": 5.29,
4544
+ "eval_loss": 0.2606270909309387,
4545
+ "eval_runtime": 1.4133,
4546
+ "eval_samples_per_second": 1555.208,
4547
+ "eval_steps_per_second": 24.764,
4548
+ "step": 227000
4549
+ },
4550
+ {
4551
+ "epoch": 5.3,
4552
+ "learning_rate": 0.00018826646840677894,
4553
+ "loss": 0.2764,
4554
+ "step": 227500
4555
+ },
4556
+ {
4557
+ "epoch": 5.31,
4558
+ "learning_rate": 0.00018779957117517532,
4559
+ "loss": 0.2759,
4560
+ "step": 228000
4561
+ },
4562
+ {
4563
+ "epoch": 5.31,
4564
+ "eval_loss": 0.25820019841194153,
4565
+ "eval_runtime": 1.4725,
4566
+ "eval_samples_per_second": 1492.708,
4567
+ "eval_steps_per_second": 23.769,
4568
+ "step": 228000
4569
+ },
4570
+ {
4571
+ "epoch": 5.32,
4572
+ "learning_rate": 0.00018733231525271625,
4573
+ "loss": 0.2754,
4574
+ "step": 228500
4575
+ },
4576
+ {
4577
+ "epoch": 5.34,
4578
+ "learning_rate": 0.00018686470574923766,
4579
+ "loss": 0.2754,
4580
+ "step": 229000
4581
+ },
4582
+ {
4583
+ "epoch": 5.34,
4584
+ "eval_loss": 0.25702887773513794,
4585
+ "eval_runtime": 1.4623,
4586
+ "eval_samples_per_second": 1503.12,
4587
+ "eval_steps_per_second": 23.935,
4588
+ "step": 229000
4589
+ },
4590
+ {
4591
+ "epoch": 5.35,
4592
+ "learning_rate": 0.00018639674777844224,
4593
+ "loss": 0.2754,
4594
+ "step": 229500
4595
+ },
4596
+ {
4597
+ "epoch": 5.36,
4598
+ "learning_rate": 0.00018592844645784327,
4599
+ "loss": 0.2753,
4600
+ "step": 230000
4601
+ },
4602
+ {
4603
+ "epoch": 5.36,
4604
+ "eval_loss": 0.25963670015335083,
4605
+ "eval_runtime": 1.4406,
4606
+ "eval_samples_per_second": 1525.752,
4607
+ "eval_steps_per_second": 24.295,
4608
+ "step": 230000
4609
  }
4610
  ],
4611
  "max_steps": 500000,
4612
  "num_train_epochs": 12,
4613
+ "total_flos": 7.348191706858966e+21,
4614
  "trial_name": null,
4615
  "trial_params": null
4616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb407692499c91ceb95468636179875a29017b0607fee237df69b7c6f035183b
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3e57cb5c11d79b27478df38200399f2d7e69d1e9da7282bcbb17ee802e3b1d
3
  size 102501541