besimray commited on
Commit
97f1c8e
·
verified ·
1 Parent(s): 912f30a

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c77bd252404343d59fa47e11d87b9d7494bdc0ce74fb6444c47871aca36d1f6
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e566ffe60098290503a9ade5d78567948f2deaadfc78dc21f93166a51762f317
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65cf1d6e092f1f2385667e8efd41a0118af57063735c60905ff65cddd11c6b83
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570d269b8daf52937ab82de45e6ccea0b05f7c7f282daecde87024f41199f500
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:597c452dbb6d2e2055a44445aa9e337aa4085eacd11fcbc7213a5eb6f6055867
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63c9cc0f3c04b2c00adeda1f5d60d230e61e99e3404c96ea1e859f8d4e96fa90
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.617271900177002,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.022590893046240734,
5
  "eval_steps": 10,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -639,6 +639,84 @@
639
  "eval_samples_per_second": 5.528,
640
  "eval_steps_per_second": 5.528,
641
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  }
643
  ],
644
  "logging_steps": 1,
@@ -653,7 +731,7 @@
653
  "early_stopping_threshold": 0.0
654
  },
655
  "attributes": {
656
- "early_stopping_patience_counter": 0
657
  }
658
  },
659
  "TrainerControl": {
@@ -667,7 +745,7 @@
667
  "attributes": {}
668
  }
669
  },
670
- "total_flos": 7830428681502720.0,
671
  "train_batch_size": 1,
672
  "trial_name": null,
673
  "trial_params": null
 
1
  {
2
  "best_metric": 1.617271900177002,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.025414754677020826,
5
  "eval_steps": 10,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
639
  "eval_samples_per_second": 5.528,
640
  "eval_steps_per_second": 5.528,
641
  "step": 80
642
+ },
643
+ {
644
+ "epoch": 0.022873279209318743,
645
+ "grad_norm": 0.908437192440033,
646
+ "learning_rate": 0.0001898168561213419,
647
+ "loss": 1.9605,
648
+ "step": 81
649
+ },
650
+ {
651
+ "epoch": 0.02315566537239675,
652
+ "grad_norm": 0.8091934323310852,
653
+ "learning_rate": 0.0001895331334332753,
654
+ "loss": 1.5024,
655
+ "step": 82
656
+ },
657
+ {
658
+ "epoch": 0.023438051535474763,
659
+ "grad_norm": 1.5132545232772827,
660
+ "learning_rate": 0.0001892457303887706,
661
+ "loss": 1.0696,
662
+ "step": 83
663
+ },
664
+ {
665
+ "epoch": 0.02372043769855277,
666
+ "grad_norm": 1.2116718292236328,
667
+ "learning_rate": 0.0001889546588018412,
668
+ "loss": 1.7115,
669
+ "step": 84
670
+ },
671
+ {
672
+ "epoch": 0.02400282386163078,
673
+ "grad_norm": 0.8074257969856262,
674
+ "learning_rate": 0.00018865993063730004,
675
+ "loss": 2.7291,
676
+ "step": 85
677
+ },
678
+ {
679
+ "epoch": 0.02428521002470879,
680
+ "grad_norm": 1.1341824531555176,
681
+ "learning_rate": 0.00018836155801026753,
682
+ "loss": 1.8977,
683
+ "step": 86
684
+ },
685
+ {
686
+ "epoch": 0.024567596187786797,
687
+ "grad_norm": 1.9230746030807495,
688
+ "learning_rate": 0.0001880595531856738,
689
+ "loss": 1.5919,
690
+ "step": 87
691
+ },
692
+ {
693
+ "epoch": 0.02484998235086481,
694
+ "grad_norm": 0.9849350452423096,
695
+ "learning_rate": 0.00018775392857775432,
696
+ "loss": 2.8404,
697
+ "step": 88
698
+ },
699
+ {
700
+ "epoch": 0.025132368513942818,
701
+ "grad_norm": 2.4642324447631836,
702
+ "learning_rate": 0.00018744469674953956,
703
+ "loss": 1.2864,
704
+ "step": 89
705
+ },
706
+ {
707
+ "epoch": 0.025414754677020826,
708
+ "grad_norm": 1.7967941761016846,
709
+ "learning_rate": 0.00018713187041233896,
710
+ "loss": 1.8635,
711
+ "step": 90
712
+ },
713
+ {
714
+ "epoch": 0.025414754677020826,
715
+ "eval_loss": 1.6206018924713135,
716
+ "eval_runtime": 134.6331,
717
+ "eval_samples_per_second": 5.541,
718
+ "eval_steps_per_second": 5.541,
719
+ "step": 90
720
  }
721
  ],
722
  "logging_steps": 1,
 
731
  "early_stopping_threshold": 0.0
732
  },
733
  "attributes": {
734
+ "early_stopping_patience_counter": 1
735
  }
736
  },
737
  "TrainerControl": {
 
745
  "attributes": {}
746
  }
747
  },
748
+ "total_flos": 8809232266690560.0,
749
  "train_batch_size": 1,
750
  "trial_name": null,
751
  "trial_params": null