cwaud commited on
Commit
ee9da08
·
verified ·
1 Parent(s): f68aaa2

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:598a1b646f1aaa55b38295907727506f4b6ee062fd31bb083e764b11b088b72c
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc321b67befde9d32d0f759ae44d1d80ee068c965b067e8f9195d58612be3ea0
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6041c64d8b414c290dc3a63c572f714a182041e338c003c2e0760e215a3dbe07
3
  size 194840426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c9e732f66f7d9824044594dce52a639a7c4361adf5450f030c8624a9c1f4741
3
  size 194840426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a80e9535bb120d1c83c5be04c75e7223dedcf669ec017da7928133c96ac473a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c5cab54e7970b8ccc67a7f8c2fe78f7bf63d36c0111ad86163a4833d9fb630
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b59900807a53206c18ba823bc025a095d3f72d389a777af59a68a05145efc8d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa213f8b4f645aefc7b7ed34404a704afe52702d2abc5afd1fcf6555c25785dd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.1112220287323,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
- "epoch": 0.1712527315702627,
5
  "eval_steps": 25,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4407,6 +4407,372 @@
4407
  "eval_samples_per_second": 7.475,
4408
  "eval_steps_per_second": 7.475,
4409
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4410
  }
4411
  ],
4412
  "logging_steps": 1,
@@ -4435,7 +4801,7 @@
4435
  "attributes": {}
4436
  }
4437
  },
4438
- "total_flos": 6.707972385527562e+17,
4439
  "train_batch_size": 1,
4440
  "trial_name": null,
4441
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.107257604598999,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-650",
4
+ "epoch": 0.18552379253445123,
5
  "eval_steps": 25,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4407
  "eval_samples_per_second": 7.475,
4408
  "eval_steps_per_second": 7.475,
4409
  "step": 600
4410
+ },
4411
+ {
4412
+ "epoch": 0.17153815278954646,
4413
+ "grad_norm": 0.4549245536327362,
4414
+ "learning_rate": 6.576765361421342e-05,
4415
+ "loss": 2.5277,
4416
+ "step": 601
4417
+ },
4418
+ {
4419
+ "epoch": 0.17182357400883022,
4420
+ "grad_norm": 0.4663224220275879,
4421
+ "learning_rate": 6.54792795378143e-05,
4422
+ "loss": 2.3604,
4423
+ "step": 602
4424
+ },
4425
+ {
4426
+ "epoch": 0.172108995228114,
4427
+ "grad_norm": 0.5545827746391296,
4428
+ "learning_rate": 6.519150935911562e-05,
4429
+ "loss": 2.3525,
4430
+ "step": 603
4431
+ },
4432
+ {
4433
+ "epoch": 0.17239441644739775,
4434
+ "grad_norm": 0.557524561882019,
4435
+ "learning_rate": 6.490434698155264e-05,
4436
+ "loss": 2.1753,
4437
+ "step": 604
4438
+ },
4439
+ {
4440
+ "epoch": 0.17267983766668155,
4441
+ "grad_norm": 0.5588095784187317,
4442
+ "learning_rate": 6.46177963003161e-05,
4443
+ "loss": 2.3271,
4444
+ "step": 605
4445
+ },
4446
+ {
4447
+ "epoch": 0.1729652588859653,
4448
+ "grad_norm": 0.5639928579330444,
4449
+ "learning_rate": 6.43318612022995e-05,
4450
+ "loss": 2.3426,
4451
+ "step": 606
4452
+ },
4453
+ {
4454
+ "epoch": 0.17325068010524908,
4455
+ "grad_norm": 0.5503498315811157,
4456
+ "learning_rate": 6.404654556604627e-05,
4457
+ "loss": 2.1217,
4458
+ "step": 607
4459
+ },
4460
+ {
4461
+ "epoch": 0.17353610132453284,
4462
+ "grad_norm": 0.5461228489875793,
4463
+ "learning_rate": 6.376185326169721e-05,
4464
+ "loss": 1.8886,
4465
+ "step": 608
4466
+ },
4467
+ {
4468
+ "epoch": 0.1738215225438166,
4469
+ "grad_norm": 0.4937599003314972,
4470
+ "learning_rate": 6.347778815093796e-05,
4471
+ "loss": 2.1941,
4472
+ "step": 609
4473
+ },
4474
+ {
4475
+ "epoch": 0.17410694376310037,
4476
+ "grad_norm": 0.5086988806724548,
4477
+ "learning_rate": 6.31943540869467e-05,
4478
+ "loss": 1.9679,
4479
+ "step": 610
4480
+ },
4481
+ {
4482
+ "epoch": 0.17439236498238417,
4483
+ "grad_norm": 0.5381971001625061,
4484
+ "learning_rate": 6.291155491434175e-05,
4485
+ "loss": 2.0835,
4486
+ "step": 611
4487
+ },
4488
+ {
4489
+ "epoch": 0.17467778620166793,
4490
+ "grad_norm": 0.554766833782196,
4491
+ "learning_rate": 6.262939446912959e-05,
4492
+ "loss": 1.8391,
4493
+ "step": 612
4494
+ },
4495
+ {
4496
+ "epoch": 0.1749632074209517,
4497
+ "grad_norm": 0.5361195206642151,
4498
+ "learning_rate": 6.234787657865266e-05,
4499
+ "loss": 2.1932,
4500
+ "step": 613
4501
+ },
4502
+ {
4503
+ "epoch": 0.17524862864023547,
4504
+ "grad_norm": 0.5431385040283203,
4505
+ "learning_rate": 6.206700506153755e-05,
4506
+ "loss": 2.089,
4507
+ "step": 614
4508
+ },
4509
+ {
4510
+ "epoch": 0.17553404985951923,
4511
+ "grad_norm": 0.5752652287483215,
4512
+ "learning_rate": 6.178678372764315e-05,
4513
+ "loss": 2.0846,
4514
+ "step": 615
4515
+ },
4516
+ {
4517
+ "epoch": 0.17581947107880302,
4518
+ "grad_norm": 0.5880393981933594,
4519
+ "learning_rate": 6.150721637800901e-05,
4520
+ "loss": 2.0762,
4521
+ "step": 616
4522
+ },
4523
+ {
4524
+ "epoch": 0.1761048922980868,
4525
+ "grad_norm": 0.5371225476264954,
4526
+ "learning_rate": 6.122830680480383e-05,
4527
+ "loss": 2.1387,
4528
+ "step": 617
4529
+ },
4530
+ {
4531
+ "epoch": 0.17639031351737056,
4532
+ "grad_norm": 0.5906757712364197,
4533
+ "learning_rate": 6.095005879127379e-05,
4534
+ "loss": 2.2487,
4535
+ "step": 618
4536
+ },
4537
+ {
4538
+ "epoch": 0.17667573473665432,
4539
+ "grad_norm": 0.5833765268325806,
4540
+ "learning_rate": 6.067247611169153e-05,
4541
+ "loss": 2.0569,
4542
+ "step": 619
4543
+ },
4544
+ {
4545
+ "epoch": 0.1769611559559381,
4546
+ "grad_norm": 0.5555745959281921,
4547
+ "learning_rate": 6.039556253130481e-05,
4548
+ "loss": 2.1929,
4549
+ "step": 620
4550
+ },
4551
+ {
4552
+ "epoch": 0.17724657717522188,
4553
+ "grad_norm": 0.5676312446594238,
4554
+ "learning_rate": 6.0119321806285366e-05,
4555
+ "loss": 2.0863,
4556
+ "step": 621
4557
+ },
4558
+ {
4559
+ "epoch": 0.17753199839450565,
4560
+ "grad_norm": 0.5911099910736084,
4561
+ "learning_rate": 5.984375768367816e-05,
4562
+ "loss": 1.9681,
4563
+ "step": 622
4564
+ },
4565
+ {
4566
+ "epoch": 0.1778174196137894,
4567
+ "grad_norm": 0.6491994857788086,
4568
+ "learning_rate": 5.9568873901350295e-05,
4569
+ "loss": 2.1629,
4570
+ "step": 623
4571
+ },
4572
+ {
4573
+ "epoch": 0.17810284083307318,
4574
+ "grad_norm": 0.5989968776702881,
4575
+ "learning_rate": 5.9294674187940635e-05,
4576
+ "loss": 1.817,
4577
+ "step": 624
4578
+ },
4579
+ {
4580
+ "epoch": 0.17838826205235694,
4581
+ "grad_norm": 0.5644077658653259,
4582
+ "learning_rate": 5.902116226280887e-05,
4583
+ "loss": 2.2051,
4584
+ "step": 625
4585
+ },
4586
+ {
4587
+ "epoch": 0.17838826205235694,
4588
+ "eval_loss": 2.110799789428711,
4589
+ "eval_runtime": 6.6942,
4590
+ "eval_samples_per_second": 7.469,
4591
+ "eval_steps_per_second": 7.469,
4592
+ "step": 625
4593
+ },
4594
+ {
4595
+ "epoch": 0.17867368327164074,
4596
+ "grad_norm": 0.5663304924964905,
4597
+ "learning_rate": 5.874834183598541e-05,
4598
+ "loss": 2.2308,
4599
+ "step": 626
4600
+ },
4601
+ {
4602
+ "epoch": 0.1789591044909245,
4603
+ "grad_norm": 0.6335240602493286,
4604
+ "learning_rate": 5.847621660812067e-05,
4605
+ "loss": 2.3482,
4606
+ "step": 627
4607
+ },
4608
+ {
4609
+ "epoch": 0.17924452571020827,
4610
+ "grad_norm": 0.5667268633842468,
4611
+ "learning_rate": 5.820479027043531e-05,
4612
+ "loss": 2.3666,
4613
+ "step": 628
4614
+ },
4615
+ {
4616
+ "epoch": 0.17952994692949203,
4617
+ "grad_norm": 0.5804107189178467,
4618
+ "learning_rate": 5.793406650466982e-05,
4619
+ "loss": 1.9951,
4620
+ "step": 629
4621
+ },
4622
+ {
4623
+ "epoch": 0.1798153681487758,
4624
+ "grad_norm": 0.6026704907417297,
4625
+ "learning_rate": 5.7664048983034746e-05,
4626
+ "loss": 1.9337,
4627
+ "step": 630
4628
+ },
4629
+ {
4630
+ "epoch": 0.1801007893680596,
4631
+ "grad_norm": 0.6197707653045654,
4632
+ "learning_rate": 5.739474136816092e-05,
4633
+ "loss": 2.1738,
4634
+ "step": 631
4635
+ },
4636
+ {
4637
+ "epoch": 0.18038621058734336,
4638
+ "grad_norm": 0.60870361328125,
4639
+ "learning_rate": 5.712614731304959e-05,
4640
+ "loss": 2.0636,
4641
+ "step": 632
4642
+ },
4643
+ {
4644
+ "epoch": 0.18067163180662713,
4645
+ "grad_norm": 0.6289341449737549,
4646
+ "learning_rate": 5.6858270461023034e-05,
4647
+ "loss": 2.2225,
4648
+ "step": 633
4649
+ },
4650
+ {
4651
+ "epoch": 0.1809570530259109,
4652
+ "grad_norm": 0.6064093708992004,
4653
+ "learning_rate": 5.6591114445675065e-05,
4654
+ "loss": 2.014,
4655
+ "step": 634
4656
+ },
4657
+ {
4658
+ "epoch": 0.18124247424519466,
4659
+ "grad_norm": 0.6756201386451721,
4660
+ "learning_rate": 5.632468289082184e-05,
4661
+ "loss": 2.0107,
4662
+ "step": 635
4663
+ },
4664
+ {
4665
+ "epoch": 0.18152789546447845,
4666
+ "grad_norm": 0.6307650804519653,
4667
+ "learning_rate": 5.6058979410452436e-05,
4668
+ "loss": 1.9966,
4669
+ "step": 636
4670
+ },
4671
+ {
4672
+ "epoch": 0.18181331668376222,
4673
+ "grad_norm": 0.6745278239250183,
4674
+ "learning_rate": 5.5794007608680235e-05,
4675
+ "loss": 2.075,
4676
+ "step": 637
4677
+ },
4678
+ {
4679
+ "epoch": 0.18209873790304598,
4680
+ "grad_norm": 0.7290084958076477,
4681
+ "learning_rate": 5.552977107969375e-05,
4682
+ "loss": 2.2774,
4683
+ "step": 638
4684
+ },
4685
+ {
4686
+ "epoch": 0.18238415912232975,
4687
+ "grad_norm": 0.649732232093811,
4688
+ "learning_rate": 5.5266273407707934e-05,
4689
+ "loss": 2.2732,
4690
+ "step": 639
4691
+ },
4692
+ {
4693
+ "epoch": 0.1826695803416135,
4694
+ "grad_norm": 0.7042523622512817,
4695
+ "learning_rate": 5.500351816691556e-05,
4696
+ "loss": 2.2515,
4697
+ "step": 640
4698
+ },
4699
+ {
4700
+ "epoch": 0.1829550015608973,
4701
+ "grad_norm": 0.6616380214691162,
4702
+ "learning_rate": 5.474150892143879e-05,
4703
+ "loss": 1.8062,
4704
+ "step": 641
4705
+ },
4706
+ {
4707
+ "epoch": 0.18324042278018107,
4708
+ "grad_norm": 0.6693776845932007,
4709
+ "learning_rate": 5.448024922528079e-05,
4710
+ "loss": 1.988,
4711
+ "step": 642
4712
+ },
4713
+ {
4714
+ "epoch": 0.18352584399946484,
4715
+ "grad_norm": 0.658955991268158,
4716
+ "learning_rate": 5.421974262227747e-05,
4717
+ "loss": 2.1457,
4718
+ "step": 643
4719
+ },
4720
+ {
4721
+ "epoch": 0.1838112652187486,
4722
+ "grad_norm": 0.7057983875274658,
4723
+ "learning_rate": 5.395999264604956e-05,
4724
+ "loss": 2.051,
4725
+ "step": 644
4726
+ },
4727
+ {
4728
+ "epoch": 0.18409668643803237,
4729
+ "grad_norm": 0.6857619881629944,
4730
+ "learning_rate": 5.370100281995445e-05,
4731
+ "loss": 1.8961,
4732
+ "step": 645
4733
+ },
4734
+ {
4735
+ "epoch": 0.18438210765731614,
4736
+ "grad_norm": 0.7401403188705444,
4737
+ "learning_rate": 5.344277665703872e-05,
4738
+ "loss": 2.1315,
4739
+ "step": 646
4740
+ },
4741
+ {
4742
+ "epoch": 0.18466752887659993,
4743
+ "grad_norm": 0.7094218134880066,
4744
+ "learning_rate": 5.318531765999015e-05,
4745
+ "loss": 2.0304,
4746
+ "step": 647
4747
+ },
4748
+ {
4749
+ "epoch": 0.1849529500958837,
4750
+ "grad_norm": 1.1581752300262451,
4751
+ "learning_rate": 5.292862932109044e-05,
4752
+ "loss": 2.3675,
4753
+ "step": 648
4754
+ },
4755
+ {
4756
+ "epoch": 0.18523837131516746,
4757
+ "grad_norm": 0.8512890934944153,
4758
+ "learning_rate": 5.267271512216774e-05,
4759
+ "loss": 2.1611,
4760
+ "step": 649
4761
+ },
4762
+ {
4763
+ "epoch": 0.18552379253445123,
4764
+ "grad_norm": 1.1400418281555176,
4765
+ "learning_rate": 5.2417578534549406e-05,
4766
+ "loss": 2.0556,
4767
+ "step": 650
4768
+ },
4769
+ {
4770
+ "epoch": 0.18552379253445123,
4771
+ "eval_loss": 2.107257604598999,
4772
+ "eval_runtime": 6.7052,
4773
+ "eval_samples_per_second": 7.457,
4774
+ "eval_steps_per_second": 7.457,
4775
+ "step": 650
4776
  }
4777
  ],
4778
  "logging_steps": 1,
 
4801
  "attributes": {}
4802
  }
4803
  },
4804
+ "total_flos": 7.266940971374346e+17,
4805
  "train_batch_size": 1,
4806
  "trial_name": null,
4807
  "trial_params": null