rdemorais commited on
Commit
408e46a
1 Parent(s): 922bf2e

Training in progress, step 24400

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:582d50a5a8f019141e250e840870fee5833dcefaefb338a10e0a481e3b50a51c
3
  size 2226478553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210a128933aae1983894b72b165841d8274c0d0117d155aea16bf708d2227228
3
  size 2226478553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
3
  size 1113252715
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88a6c9c9d9a0e90d25aefe3b1eec75c6a8d9602de8536842eb2cffe609f70cab
3
  size 17563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e36185845bc18a656a31db5076cb45fb810dd43869bde58c051cae5a0e36a7
3
  size 17563
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23bf027f9216fbe8b17c91852878854c7eb1a9b37ceb42d5492b73cb3332b194
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8213aaa5f90a2b250062d1c60bbdb54ae5dfd572df54e04a4bade191c0a7c5
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4216a163f98df4253a3d2dda8c01570c7d3175dacb0f7d8217e0ca3a27141a6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a748bb88559c27203523064ee7efe8a457e4bd76047f9c37d15baf1de97eaec9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9824590147147672,
5
- "global_step": 24200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -14526,11 +14526,131 @@
14526
  "learning_rate": 9.353378320662852e-07,
14527
  "loss": 1.0992,
14528
  "step": 24200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14529
  }
14530
  ],
14531
  "max_steps": 24414,
14532
  "num_train_epochs": 2,
14533
- "total_flos": 3.269571533476485e+18,
14534
  "trial_name": null,
14535
  "trial_params": null
14536
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9988428888866134,
5
+ "global_step": 24400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
14526
  "learning_rate": 9.353378320662852e-07,
14527
  "loss": 1.0992,
14528
  "step": 24200
14529
+ },
14530
+ {
14531
+ "epoch": 1.98,
14532
+ "learning_rate": 8.968992910224651e-07,
14533
+ "loss": 1.0964,
14534
+ "step": 24210
14535
+ },
14536
+ {
14537
+ "epoch": 1.98,
14538
+ "learning_rate": 8.541898009737764e-07,
14539
+ "loss": 1.0921,
14540
+ "step": 24220
14541
+ },
14542
+ {
14543
+ "epoch": 1.98,
14544
+ "learning_rate": 8.114803109250876e-07,
14545
+ "loss": 1.0858,
14546
+ "step": 24230
14547
+ },
14548
+ {
14549
+ "epoch": 1.99,
14550
+ "learning_rate": 7.687708208763988e-07,
14551
+ "loss": 1.0917,
14552
+ "step": 24240
14553
+ },
14554
+ {
14555
+ "epoch": 1.99,
14556
+ "learning_rate": 7.260613308277099e-07,
14557
+ "loss": 1.0885,
14558
+ "step": 24250
14559
+ },
14560
+ {
14561
+ "epoch": 1.99,
14562
+ "learning_rate": 6.833518407790211e-07,
14563
+ "loss": 1.0898,
14564
+ "step": 24260
14565
+ },
14566
+ {
14567
+ "epoch": 1.99,
14568
+ "learning_rate": 6.406423507303323e-07,
14569
+ "loss": 1.0852,
14570
+ "step": 24270
14571
+ },
14572
+ {
14573
+ "epoch": 1.99,
14574
+ "learning_rate": 5.979328606816435e-07,
14575
+ "loss": 1.0954,
14576
+ "step": 24280
14577
+ },
14578
+ {
14579
+ "epoch": 1.99,
14580
+ "learning_rate": 5.552233706329546e-07,
14581
+ "loss": 1.1051,
14582
+ "step": 24290
14583
+ },
14584
+ {
14585
+ "epoch": 1.99,
14586
+ "learning_rate": 5.125138805842659e-07,
14587
+ "loss": 1.0875,
14588
+ "step": 24300
14589
+ },
14590
+ {
14591
+ "epoch": 1.99,
14592
+ "learning_rate": 4.6980439053557705e-07,
14593
+ "loss": 1.0967,
14594
+ "step": 24310
14595
+ },
14596
+ {
14597
+ "epoch": 1.99,
14598
+ "learning_rate": 4.270949004868882e-07,
14599
+ "loss": 1.099,
14600
+ "step": 24320
14601
+ },
14602
+ {
14603
+ "epoch": 1.99,
14604
+ "learning_rate": 3.843854104381994e-07,
14605
+ "loss": 1.0952,
14606
+ "step": 24330
14607
+ },
14608
+ {
14609
+ "epoch": 1.99,
14610
+ "learning_rate": 3.4167592038951053e-07,
14611
+ "loss": 1.0911,
14612
+ "step": 24340
14613
+ },
14614
+ {
14615
+ "epoch": 1.99,
14616
+ "learning_rate": 2.9896643034082176e-07,
14617
+ "loss": 1.0909,
14618
+ "step": 24350
14619
+ },
14620
+ {
14621
+ "epoch": 2.0,
14622
+ "learning_rate": 2.5625694029213294e-07,
14623
+ "loss": 1.0973,
14624
+ "step": 24360
14625
+ },
14626
+ {
14627
+ "epoch": 2.0,
14628
+ "learning_rate": 2.135474502434441e-07,
14629
+ "loss": 1.0926,
14630
+ "step": 24370
14631
+ },
14632
+ {
14633
+ "epoch": 2.0,
14634
+ "learning_rate": 1.7083796019475527e-07,
14635
+ "loss": 1.0924,
14636
+ "step": 24380
14637
+ },
14638
+ {
14639
+ "epoch": 2.0,
14640
+ "learning_rate": 1.2812847014606647e-07,
14641
+ "loss": 1.0848,
14642
+ "step": 24390
14643
+ },
14644
+ {
14645
+ "epoch": 2.0,
14646
+ "learning_rate": 8.541898009737763e-08,
14647
+ "loss": 1.1018,
14648
+ "step": 24400
14649
  }
14650
  ],
14651
  "max_steps": 24414,
14652
  "num_train_epochs": 2,
14653
+ "total_flos": 3.296592748745349e+18,
14654
  "trial_name": null,
14655
  "trial_params": null
14656
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
3
  size 1113252715
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
3
  size 1113252715