plip commited on
Commit
da42e3b
1 Parent(s): e59df61

Training in progress, step 380000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df6d3d3f9674103740b8f59e2a1f3f36fbba555fa4f14347ef60833e8c7c8d0f
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f4969533102642963b0b2227bcca65da739e93c9ba93ada329fad00247349b
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31402a0bcc9e6eb51374c53180861ce7a4f03142fd97638776d46c6dc480c809
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2047b5f47fc3de929bb0738f7fbdd248300ab063f6fd4eddcabc29f5482852
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.620890514690464,
5
- "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7406,11 +7406,211 @@
7406
  "eval_samples_per_second": 1511.147,
7407
  "eval_steps_per_second": 24.063,
7408
  "step": 370000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7409
  }
7410
  ],
7411
  "max_steps": 500000,
7412
  "num_train_epochs": 12,
7413
- "total_flos": 1.1821004158729246e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.853887555628043,
5
+ "global_step": 380000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7406
  "eval_samples_per_second": 1511.147,
7407
  "eval_steps_per_second": 24.063,
7408
  "step": 370000
7409
+ },
7410
+ {
7411
+ "epoch": 8.63,
7412
+ "learning_rate": 6.0012253629189544e-05,
7413
+ "loss": 0.2584,
7414
+ "step": 370500
7415
+ },
7416
+ {
7417
+ "epoch": 8.64,
7418
+ "learning_rate": 5.965048145015944e-05,
7419
+ "loss": 0.2585,
7420
+ "step": 371000
7421
+ },
7422
+ {
7423
+ "epoch": 8.64,
7424
+ "eval_loss": 0.24364076554775238,
7425
+ "eval_runtime": 1.4292,
7426
+ "eval_samples_per_second": 1537.961,
7427
+ "eval_steps_per_second": 24.49,
7428
+ "step": 371000
7429
+ },
7430
+ {
7431
+ "epoch": 8.66,
7432
+ "learning_rate": 5.928975199818785e-05,
7433
+ "loss": 0.2583,
7434
+ "step": 371500
7435
+ },
7436
+ {
7437
+ "epoch": 8.67,
7438
+ "learning_rate": 5.893006921815428e-05,
7439
+ "loss": 0.2582,
7440
+ "step": 372000
7441
+ },
7442
+ {
7443
+ "epoch": 8.67,
7444
+ "eval_loss": 0.24061721563339233,
7445
+ "eval_runtime": 1.4626,
7446
+ "eval_samples_per_second": 1502.828,
7447
+ "eval_steps_per_second": 23.93,
7448
+ "step": 372000
7449
+ },
7450
+ {
7451
+ "epoch": 8.68,
7452
+ "learning_rate": 5.857143704349198e-05,
7453
+ "loss": 0.2584,
7454
+ "step": 372500
7455
+ },
7456
+ {
7457
+ "epoch": 8.69,
7458
+ "learning_rate": 5.8213859396144986e-05,
7459
+ "loss": 0.2583,
7460
+ "step": 373000
7461
+ },
7462
+ {
7463
+ "epoch": 8.69,
7464
+ "eval_loss": 0.24275849759578705,
7465
+ "eval_runtime": 1.4296,
7466
+ "eval_samples_per_second": 1537.531,
7467
+ "eval_steps_per_second": 24.483,
7468
+ "step": 373000
7469
+ },
7470
+ {
7471
+ "epoch": 8.7,
7472
+ "learning_rate": 5.785734018652507e-05,
7473
+ "loss": 0.2584,
7474
+ "step": 373500
7475
+ },
7476
+ {
7477
+ "epoch": 8.71,
7478
+ "learning_rate": 5.750188331346927e-05,
7479
+ "loss": 0.2585,
7480
+ "step": 374000
7481
+ },
7482
+ {
7483
+ "epoch": 8.71,
7484
+ "eval_loss": 0.24333250522613525,
7485
+ "eval_runtime": 1.4555,
7486
+ "eval_samples_per_second": 1510.18,
7487
+ "eval_steps_per_second": 24.047,
7488
+ "step": 374000
7489
+ },
7490
+ {
7491
+ "epoch": 8.73,
7492
+ "learning_rate": 5.714749266419695e-05,
7493
+ "loss": 0.2584,
7494
+ "step": 374500
7495
+ },
7496
+ {
7497
+ "epoch": 8.74,
7498
+ "learning_rate": 5.6794172114267566e-05,
7499
+ "loss": 0.2578,
7500
+ "step": 375000
7501
+ },
7502
+ {
7503
+ "epoch": 8.74,
7504
+ "eval_loss": 0.24110642075538635,
7505
+ "eval_runtime": 1.4471,
7506
+ "eval_samples_per_second": 1518.91,
7507
+ "eval_steps_per_second": 24.186,
7508
+ "step": 375000
7509
+ },
7510
+ {
7511
+ "epoch": 8.75,
7512
+ "learning_rate": 5.6441925527537914e-05,
7513
+ "loss": 0.2578,
7514
+ "step": 375500
7515
+ },
7516
+ {
7517
+ "epoch": 8.76,
7518
+ "learning_rate": 5.60907567561203e-05,
7519
+ "loss": 0.2582,
7520
+ "step": 376000
7521
+ },
7522
+ {
7523
+ "epoch": 8.76,
7524
+ "eval_loss": 0.242658793926239,
7525
+ "eval_runtime": 1.4703,
7526
+ "eval_samples_per_second": 1494.982,
7527
+ "eval_steps_per_second": 23.805,
7528
+ "step": 376000
7529
+ },
7530
+ {
7531
+ "epoch": 8.77,
7532
+ "learning_rate": 5.574066964034012e-05,
7533
+ "loss": 0.2581,
7534
+ "step": 376500
7535
+ },
7536
+ {
7537
+ "epoch": 8.78,
7538
+ "learning_rate": 5.539166800869402e-05,
7539
+ "loss": 0.258,
7540
+ "step": 377000
7541
+ },
7542
+ {
7543
+ "epoch": 8.78,
7544
+ "eval_loss": 0.2416759431362152,
7545
+ "eval_runtime": 1.4756,
7546
+ "eval_samples_per_second": 1489.53,
7547
+ "eval_steps_per_second": 23.719,
7548
+ "step": 377000
7549
+ },
7550
+ {
7551
+ "epoch": 8.8,
7552
+ "learning_rate": 5.5043755677807955e-05,
7553
+ "loss": 0.2578,
7554
+ "step": 377500
7555
+ },
7556
+ {
7557
+ "epoch": 8.81,
7558
+ "learning_rate": 5.4696936452395344e-05,
7559
+ "loss": 0.2576,
7560
+ "step": 378000
7561
+ },
7562
+ {
7563
+ "epoch": 8.81,
7564
+ "eval_loss": 0.23985832929611206,
7565
+ "eval_runtime": 1.4569,
7566
+ "eval_samples_per_second": 1508.675,
7567
+ "eval_steps_per_second": 24.023,
7568
+ "step": 378000
7569
+ },
7570
+ {
7571
+ "epoch": 8.82,
7572
+ "learning_rate": 5.435121412521576e-05,
7573
+ "loss": 0.2579,
7574
+ "step": 378500
7575
+ },
7576
+ {
7577
+ "epoch": 8.83,
7578
+ "learning_rate": 5.400659247703307e-05,
7579
+ "loss": 0.2574,
7580
+ "step": 379000
7581
+ },
7582
+ {
7583
+ "epoch": 8.83,
7584
+ "eval_loss": 0.24152863025665283,
7585
+ "eval_runtime": 1.4464,
7586
+ "eval_samples_per_second": 1519.59,
7587
+ "eval_steps_per_second": 24.197,
7588
+ "step": 379000
7589
+ },
7590
+ {
7591
+ "epoch": 8.84,
7592
+ "learning_rate": 5.36630752765745e-05,
7593
+ "loss": 0.2576,
7594
+ "step": 379500
7595
+ },
7596
+ {
7597
+ "epoch": 8.85,
7598
+ "learning_rate": 5.3320666280489146e-05,
7599
+ "loss": 0.2579,
7600
+ "step": 380000
7601
+ },
7602
+ {
7603
+ "epoch": 8.85,
7604
+ "eval_loss": 0.24166275560855865,
7605
+ "eval_runtime": 1.4454,
7606
+ "eval_samples_per_second": 1520.737,
7607
+ "eval_steps_per_second": 24.216,
7608
+ "step": 380000
7609
  }
7610
  ],
7611
  "max_steps": 500000,
7612
  "num_train_epochs": 12,
7613
+ "total_flos": 1.2140491297503746e+22,
7614
  "trial_name": null,
7615
  "trial_params": null
7616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31402a0bcc9e6eb51374c53180861ce7a4f03142fd97638776d46c6dc480c809
3
  size 102501541