plip commited on
Commit
c38daa5
1 Parent(s): bba2600

Training in progress, step 330000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8164585c0cf8e0435f85a1a07a5b437860c2cdc001a4018d6d0b9d01b7eba98
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b12d2d3c7e9c65fa1cdfdb5c2f4716c387d264ad1a9842602482f2263f5d461
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:962905691e3e61901278bd35bae6a9e6802f21882ed0e962dd2ab116fea9b46f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8e1879944525f3e680f804773ec89ad33764384906428a52a2c3c6871bf701
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bcb5dadf85017bb8728dac44f541535faed64c5ae748a5ae3b2a144ca0708
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.455905310002563,
5
- "global_step": 320000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6406,11 +6406,211 @@
6406
  "eval_samples_per_second": 1502.721,
6407
  "eval_steps_per_second": 23.929,
6408
  "step": 320000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6409
  }
6410
  ],
6411
  "max_steps": 500000,
6412
  "num_train_epochs": 12,
6413
- "total_flos": 1.0223570961847653e+22,
6414
  "trial_name": null,
6415
  "trial_params": null
6416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.688902350940143,
5
+ "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6406
  "eval_samples_per_second": 1502.721,
6407
  "eval_steps_per_second": 23.929,
6408
  "step": 320000
6409
+ },
6410
+ {
6411
+ "epoch": 7.47,
6412
+ "learning_rate": 0.00010073144655927253,
6413
+ "loss": 0.2641,
6414
+ "step": 320500
6415
+ },
6416
+ {
6417
+ "epoch": 7.48,
6418
+ "learning_rate": 0.0001002870876838929,
6419
+ "loss": 0.2643,
6420
+ "step": 321000
6421
+ },
6422
+ {
6423
+ "epoch": 7.48,
6424
+ "eval_loss": 0.24826975166797638,
6425
+ "eval_runtime": 1.437,
6426
+ "eval_samples_per_second": 1529.624,
6427
+ "eval_steps_per_second": 24.357,
6428
+ "step": 321000
6429
+ },
6430
+ {
6431
+ "epoch": 7.49,
6432
+ "learning_rate": 9.984332714015662e-05,
6433
+ "loss": 0.264,
6434
+ "step": 321500
6435
+ },
6436
+ {
6437
+ "epoch": 7.5,
6438
+ "learning_rate": 9.94001697809578e-05,
6439
+ "loss": 0.2643,
6440
+ "step": 322000
6441
+ },
6442
+ {
6443
+ "epoch": 7.5,
6444
+ "eval_loss": 0.2487555295228958,
6445
+ "eval_runtime": 1.4486,
6446
+ "eval_samples_per_second": 1517.36,
6447
+ "eval_steps_per_second": 24.162,
6448
+ "step": 322000
6449
+ },
6450
+ {
6451
+ "epoch": 7.51,
6452
+ "learning_rate": 9.895762045259445e-05,
6453
+ "loss": 0.2639,
6454
+ "step": 322500
6455
+ },
6456
+ {
6457
+ "epoch": 7.53,
6458
+ "learning_rate": 9.851568399471498e-05,
6459
+ "loss": 0.2638,
6460
+ "step": 323000
6461
+ },
6462
+ {
6463
+ "epoch": 7.53,
6464
+ "eval_loss": 0.2484089434146881,
6465
+ "eval_runtime": 1.4155,
6466
+ "eval_samples_per_second": 1552.76,
6467
+ "eval_steps_per_second": 24.725,
6468
+ "step": 323000
6469
+ },
6470
+ {
6471
+ "epoch": 7.54,
6472
+ "learning_rate": 9.807436524026574e-05,
6473
+ "loss": 0.2639,
6474
+ "step": 323500
6475
+ },
6476
+ {
6477
+ "epoch": 7.55,
6478
+ "learning_rate": 9.763366901543801e-05,
6479
+ "loss": 0.2636,
6480
+ "step": 324000
6481
+ },
6482
+ {
6483
+ "epoch": 7.55,
6484
+ "eval_loss": 0.24783042073249817,
6485
+ "eval_runtime": 1.4248,
6486
+ "eval_samples_per_second": 1542.655,
6487
+ "eval_steps_per_second": 24.565,
6488
+ "step": 324000
6489
+ },
6490
+ {
6491
+ "epoch": 7.56,
6492
+ "learning_rate": 9.719360013961495e-05,
6493
+ "loss": 0.2634,
6494
+ "step": 324500
6495
+ },
6496
+ {
6497
+ "epoch": 7.57,
6498
+ "learning_rate": 9.675416342531944e-05,
6499
+ "loss": 0.2633,
6500
+ "step": 325000
6501
+ },
6502
+ {
6503
+ "epoch": 7.57,
6504
+ "eval_loss": 0.24799397587776184,
6505
+ "eval_runtime": 1.4677,
6506
+ "eval_samples_per_second": 1497.557,
6507
+ "eval_steps_per_second": 23.846,
6508
+ "step": 325000
6509
+ },
6510
+ {
6511
+ "epoch": 7.58,
6512
+ "learning_rate": 9.631536367816086e-05,
6513
+ "loss": 0.2629,
6514
+ "step": 325500
6515
+ },
6516
+ {
6517
+ "epoch": 7.6,
6518
+ "learning_rate": 9.587720569678299e-05,
6519
+ "loss": 0.2633,
6520
+ "step": 326000
6521
+ },
6522
+ {
6523
+ "epoch": 7.6,
6524
+ "eval_loss": 0.24824275076389313,
6525
+ "eval_runtime": 1.4202,
6526
+ "eval_samples_per_second": 1547.658,
6527
+ "eval_steps_per_second": 24.644,
6528
+ "step": 326000
6529
+ },
6530
+ {
6531
+ "epoch": 7.61,
6532
+ "learning_rate": 9.543969427281131e-05,
6533
+ "loss": 0.2637,
6534
+ "step": 326500
6535
+ },
6536
+ {
6537
+ "epoch": 7.62,
6538
+ "learning_rate": 9.500283419080062e-05,
6539
+ "loss": 0.2637,
6540
+ "step": 327000
6541
+ },
6542
+ {
6543
+ "epoch": 7.62,
6544
+ "eval_loss": 0.24855847656726837,
6545
+ "eval_runtime": 1.3917,
6546
+ "eval_samples_per_second": 1579.351,
6547
+ "eval_steps_per_second": 25.149,
6548
+ "step": 327000
6549
+ },
6550
+ {
6551
+ "epoch": 7.63,
6552
+ "learning_rate": 9.45666302281829e-05,
6553
+ "loss": 0.264,
6554
+ "step": 327500
6555
+ },
6556
+ {
6557
+ "epoch": 7.64,
6558
+ "learning_rate": 9.413108715521467e-05,
6559
+ "loss": 0.2634,
6560
+ "step": 328000
6561
+ },
6562
+ {
6563
+ "epoch": 7.64,
6564
+ "eval_loss": 0.24637793004512787,
6565
+ "eval_runtime": 1.4458,
6566
+ "eval_samples_per_second": 1520.249,
6567
+ "eval_steps_per_second": 24.208,
6568
+ "step": 328000
6569
+ },
6570
+ {
6571
+ "epoch": 7.65,
6572
+ "learning_rate": 9.369620973492525e-05,
6573
+ "loss": 0.2638,
6574
+ "step": 328500
6575
+ },
6576
+ {
6577
+ "epoch": 7.67,
6578
+ "learning_rate": 9.326200272306445e-05,
6579
+ "loss": 0.2631,
6580
+ "step": 329000
6581
+ },
6582
+ {
6583
+ "epoch": 7.67,
6584
+ "eval_loss": 0.24531778693199158,
6585
+ "eval_runtime": 1.4556,
6586
+ "eval_samples_per_second": 1510.044,
6587
+ "eval_steps_per_second": 24.045,
6588
+ "step": 329000
6589
+ },
6590
+ {
6591
+ "epoch": 7.68,
6592
+ "learning_rate": 9.282847086805059e-05,
6593
+ "loss": 0.2636,
6594
+ "step": 329500
6595
+ },
6596
+ {
6597
+ "epoch": 7.69,
6598
+ "learning_rate": 9.239561891091853e-05,
6599
+ "loss": 0.2631,
6600
+ "step": 330000
6601
+ },
6602
+ {
6603
+ "epoch": 7.69,
6604
+ "eval_loss": 0.24822315573692322,
6605
+ "eval_runtime": 1.4463,
6606
+ "eval_samples_per_second": 1519.716,
6607
+ "eval_steps_per_second": 24.199,
6608
+ "step": 330000
6609
  }
6610
  ],
6611
  "max_steps": 500000,
6612
  "num_train_epochs": 12,
6613
+ "total_flos": 1.0543058100622153e+22,
6614
  "trial_name": null,
6615
  "trial_params": null
6616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:962905691e3e61901278bd35bae6a9e6802f21882ed0e962dd2ab116fea9b46f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8e1879944525f3e680f804773ec89ad33764384906428a52a2c3c6871bf701
3
  size 102501541