plip commited on
Commit
cb55cd5
1 Parent(s): 3497578

Training in progress, step 430000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb8b7da07a19b89f2cab9e7ae5ecfface2ed5a4207f59160b87fe1ed401ae453
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415b6b8c1b4d1a9fcb0ed4ef3046c0ad8d1ace085712e9a63b9c3ad1cdd80c3b
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c87e28b6a6f33ab4f0a8c8ae59e95a04117be20f20068b2e46c87ef398e799
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddd7f4bf85191d8a4c9627be9d4681a42609a7a71851d99a5960c2f6b9c0253a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b57b9eb19bd7ea54b7780f07359bc75fb7601e89f1aa7c6ff7720338af2926
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.785875719378364,
5
- "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8406,11 +8406,211 @@
8406
  "eval_samples_per_second": 1522.319,
8407
  "eval_steps_per_second": 24.241,
8408
  "step": 420000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 12,
8413
- "total_flos": 1.341843735561084e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.018872760315944,
5
+ "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8406
  "eval_samples_per_second": 1522.319,
8407
  "eval_steps_per_second": 24.241,
8408
  "step": 420000
8409
+ },
8410
+ {
8411
+ "epoch": 9.8,
8412
+ "learning_rate": 2.9586456905128618e-05,
8413
+ "loss": 0.2539,
8414
+ "step": 420500
8415
+ },
8416
+ {
8417
+ "epoch": 9.81,
8418
+ "learning_rate": 2.9346475051519687e-05,
8419
+ "loss": 0.2539,
8420
+ "step": 421000
8421
+ },
8422
+ {
8423
+ "epoch": 9.81,
8424
+ "eval_loss": 0.23778527975082397,
8425
+ "eval_runtime": 1.4672,
8426
+ "eval_samples_per_second": 1498.116,
8427
+ "eval_steps_per_second": 23.855,
8428
+ "step": 421000
8429
+ },
8430
+ {
8431
+ "epoch": 9.82,
8432
+ "learning_rate": 2.910786732472815e-05,
8433
+ "loss": 0.2538,
8434
+ "step": 421500
8435
+ },
8436
+ {
8437
+ "epoch": 9.83,
8438
+ "learning_rate": 2.887063633412981e-05,
8439
+ "loss": 0.254,
8440
+ "step": 422000
8441
+ },
8442
+ {
8443
+ "epoch": 9.83,
8444
+ "eval_loss": 0.23553605377674103,
8445
+ "eval_runtime": 1.4917,
8446
+ "eval_samples_per_second": 1473.465,
8447
+ "eval_steps_per_second": 23.463,
8448
+ "step": 422000
8449
+ },
8450
+ {
8451
+ "epoch": 9.84,
8452
+ "learning_rate": 2.863478467404478e-05,
8453
+ "loss": 0.2538,
8454
+ "step": 422500
8455
+ },
8456
+ {
8457
+ "epoch": 9.86,
8458
+ "learning_rate": 2.8400314923709112e-05,
8459
+ "loss": 0.2537,
8460
+ "step": 423000
8461
+ },
8462
+ {
8463
+ "epoch": 9.86,
8464
+ "eval_loss": 0.23579563200473785,
8465
+ "eval_runtime": 1.4277,
8466
+ "eval_samples_per_second": 1539.579,
8467
+ "eval_steps_per_second": 24.516,
8468
+ "step": 423000
8469
+ },
8470
+ {
8471
+ "epoch": 9.87,
8472
+ "learning_rate": 2.816722964724636e-05,
8473
+ "loss": 0.2537,
8474
+ "step": 423500
8475
+ },
8476
+ {
8477
+ "epoch": 9.88,
8478
+ "learning_rate": 2.793553139363981e-05,
8479
+ "loss": 0.2536,
8480
+ "step": 424000
8481
+ },
8482
+ {
8483
+ "epoch": 9.88,
8484
+ "eval_loss": 0.23610466718673706,
8485
+ "eval_runtime": 1.4604,
8486
+ "eval_samples_per_second": 1505.035,
8487
+ "eval_steps_per_second": 23.966,
8488
+ "step": 424000
8489
+ },
8490
+ {
8491
+ "epoch": 9.89,
8492
+ "learning_rate": 2.7705222696704366e-05,
8493
+ "loss": 0.2536,
8494
+ "step": 424500
8495
+ },
8496
+ {
8497
+ "epoch": 9.9,
8498
+ "learning_rate": 2.7476306075059096e-05,
8499
+ "loss": 0.2534,
8500
+ "step": 425000
8501
+ },
8502
+ {
8503
+ "epoch": 9.9,
8504
+ "eval_loss": 0.2384994477033615,
8505
+ "eval_runtime": 1.4421,
8506
+ "eval_samples_per_second": 1524.185,
8507
+ "eval_steps_per_second": 24.27,
8508
+ "step": 425000
8509
+ },
8510
+ {
8511
+ "epoch": 9.91,
8512
+ "learning_rate": 2.7248784032099478e-05,
8513
+ "loss": 0.2536,
8514
+ "step": 425500
8515
+ },
8516
+ {
8517
+ "epoch": 9.93,
8518
+ "learning_rate": 2.7022659055970144e-05,
8519
+ "loss": 0.2534,
8520
+ "step": 426000
8521
+ },
8522
+ {
8523
+ "epoch": 9.93,
8524
+ "eval_loss": 0.23720860481262207,
8525
+ "eval_runtime": 1.4451,
8526
+ "eval_samples_per_second": 1521.023,
8527
+ "eval_steps_per_second": 24.22,
8528
+ "step": 426000
8529
+ },
8530
+ {
8531
+ "epoch": 9.94,
8532
+ "learning_rate": 2.6797933619537604e-05,
8533
+ "loss": 0.2534,
8534
+ "step": 426500
8535
+ },
8536
+ {
8537
+ "epoch": 9.95,
8538
+ "learning_rate": 2.6574610180363166e-05,
8539
+ "loss": 0.2535,
8540
+ "step": 427000
8541
+ },
8542
+ {
8543
+ "epoch": 9.95,
8544
+ "eval_loss": 0.23618370294570923,
8545
+ "eval_runtime": 1.4698,
8546
+ "eval_samples_per_second": 1495.473,
8547
+ "eval_steps_per_second": 23.813,
8548
+ "step": 427000
8549
+ },
8550
+ {
8551
+ "epoch": 9.96,
8552
+ "learning_rate": 2.6352691180676286e-05,
8553
+ "loss": 0.2532,
8554
+ "step": 427500
8555
+ },
8556
+ {
8557
+ "epoch": 9.97,
8558
+ "learning_rate": 2.6132179047347505e-05,
8559
+ "loss": 0.2535,
8560
+ "step": 428000
8561
+ },
8562
+ {
8563
+ "epoch": 9.97,
8564
+ "eval_loss": 0.23612357676029205,
8565
+ "eval_runtime": 1.4655,
8566
+ "eval_samples_per_second": 1499.84,
8567
+ "eval_steps_per_second": 23.883,
8568
+ "step": 428000
8569
+ },
8570
+ {
8571
+ "epoch": 9.98,
8572
+ "learning_rate": 2.5913076191862238e-05,
8573
+ "loss": 0.2534,
8574
+ "step": 428500
8575
+ },
8576
+ {
8577
+ "epoch": 10.0,
8578
+ "learning_rate": 2.5695385010294165e-05,
8579
+ "loss": 0.2532,
8580
+ "step": 429000
8581
+ },
8582
+ {
8583
+ "epoch": 10.0,
8584
+ "eval_loss": 0.23688232898712158,
8585
+ "eval_runtime": 1.4807,
8586
+ "eval_samples_per_second": 1484.441,
8587
+ "eval_steps_per_second": 23.638,
8588
+ "step": 429000
8589
+ },
8590
+ {
8591
+ "epoch": 10.01,
8592
+ "learning_rate": 2.5479107883279144e-05,
8593
+ "loss": 0.2533,
8594
+ "step": 429500
8595
+ },
8596
+ {
8597
+ "epoch": 10.02,
8598
+ "learning_rate": 2.5264247175989292e-05,
8599
+ "loss": 0.2533,
8600
+ "step": 430000
8601
+ },
8602
+ {
8603
+ "epoch": 10.02,
8604
+ "eval_loss": 0.2365807592868805,
8605
+ "eval_runtime": 1.4314,
8606
+ "eval_samples_per_second": 1535.576,
8607
+ "eval_steps_per_second": 24.452,
8608
+ "step": 430000
8609
  }
8610
  ],
8611
  "max_steps": 500000,
8612
  "num_train_epochs": 12,
8613
+ "total_flos": 1.3737921997394432e+22,
8614
  "trial_name": null,
8615
  "trial_params": null
8616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5991ca4f8bc9f7681639a0a614f6fc9c7ec3e7b33dfe9e1afc9ff63491198511
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c87e28b6a6f33ab4f0a8c8ae59e95a04117be20f20068b2e46c87ef398e799
3
  size 102501541