AlekseyKorshuk commited on
Commit
53f49cf
·
1 Parent(s): 6e67db2

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/50-cent")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1awg3ygb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/50-cent")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2iekmvbo/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1zrybuom) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1zrybuom/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/50-cent",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
 
21
  "resid_pdrop": 0.1,
 
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "50-cent",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
18
  "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
+ "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
26
  "summary_first_dropout": 0.1,
 
37
  }
38
  },
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.15.0",
41
  "use_cache": true,
42
  "vocab_size": 50257
43
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.3637726306915283, "eval_runtime": 16.4999, "eval_samples_per_second": 20.727, "eval_steps_per_second": 2.606, "epoch": 2.0}
 
1
+ {"eval_loss": 3.1829922199249268, "eval_runtime": 18.7316, "eval_samples_per_second": 20.82, "eval_steps_per_second": 2.616, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:358a6f342b56f9de0136e070ea234ddaa8e96700df6535caddb90f83533ad73a
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc7b1e66dd4804ccfb9dc1714f49153dd394cb37828b4d409f91f1c2b62adc8
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54d8c0438e44a5c817661dde8d80235759dd9bf6fd31e60b82390ac67a903164
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9948f38cc1a6abe44b5a4dee4a5cf93797f4b56eee3fb384dcc06fc944da98f9
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39028db0b9207c67d95d8bc7b1634c498f86342adff06c8b1c9d2d4614a45efe
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a35c4f858facf554a582294b1f58c98b5633f4416d25525aad47c8b1d043cf4
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1962bee387c556be33f8635e4ecc47b5f5d760f1525f5cf294369a7e3bac39b
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c270da9fce7276d12ff26b0a4bcaa95748084c5a24bfdf380b2b8c8b5bdff2
3
  size 14439
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56a03ee9c4f7d46c39bb609d5652834cf98daba2d29c096120b392bb7cba62b1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1916ec374b6c0a9df0adb0631f08e8ad04238433d3a103302e9d6c65c1db806
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 3.3637726306915283,
3
- "best_model_checkpoint": "output/50-cent/checkpoint-522",
4
- "epoch": 2.0,
5
- "global_step": 522,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -654,11 +654,313 @@
654
  "eval_samples_per_second": 20.748,
655
  "eval_steps_per_second": 2.609,
656
  "step": 522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  }
658
  ],
659
- "max_steps": 522,
660
- "num_train_epochs": 2,
661
- "total_flos": 544271302656000.0,
662
  "trial_name": null,
663
  "trial_params": null
664
  }
 
1
  {
2
+ "best_metric": 3.1829922199249268,
3
+ "best_model_checkpoint": "output/50-cent/checkpoint-765",
4
+ "epoch": 3.0,
5
+ "global_step": 765,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
654
  "eval_samples_per_second": 20.748,
655
  "eval_steps_per_second": 2.609,
656
  "step": 522
657
+ },
658
+ {
659
+ "epoch": 2.06,
660
+ "learning_rate": 0.00013603195463831566,
661
+ "loss": 3.4382,
662
+ "step": 525
663
+ },
664
+ {
665
+ "epoch": 2.08,
666
+ "learning_rate": 0.00013512806981200165,
667
+ "loss": 3.3096,
668
+ "step": 530
669
+ },
670
+ {
671
+ "epoch": 2.1,
672
+ "learning_rate": 0.00013397182122930294,
673
+ "loss": 3.2608,
674
+ "step": 535
675
+ },
676
+ {
677
+ "epoch": 2.12,
678
+ "learning_rate": 0.00013256759493713883,
679
+ "loss": 3.3047,
680
+ "step": 540
681
+ },
682
+ {
683
+ "epoch": 2.14,
684
+ "learning_rate": 0.00013092071764681933,
685
+ "loss": 3.3901,
686
+ "step": 545
687
+ },
688
+ {
689
+ "epoch": 2.16,
690
+ "learning_rate": 0.00012903743652800486,
691
+ "loss": 3.3488,
692
+ "step": 550
693
+ },
694
+ {
695
+ "epoch": 2.18,
696
+ "learning_rate": 0.00012692489551105156,
697
+ "loss": 3.3604,
698
+ "step": 555
699
+ },
700
+ {
701
+ "epoch": 2.2,
702
+ "learning_rate": 0.0001245911081876368,
703
+ "loss": 3.2288,
704
+ "step": 560
705
+ },
706
+ {
707
+ "epoch": 2.22,
708
+ "learning_rate": 0.00012204492741246097,
709
+ "loss": 3.3826,
710
+ "step": 565
711
+ },
712
+ {
713
+ "epoch": 2.24,
714
+ "learning_rate": 0.0001192960117213372,
715
+ "loss": 3.3225,
716
+ "step": 570
717
+ },
718
+ {
719
+ "epoch": 2.25,
720
+ "learning_rate": 0.0001163547886930568,
721
+ "loss": 3.39,
722
+ "step": 575
723
+ },
724
+ {
725
+ "epoch": 2.27,
726
+ "learning_rate": 0.00011323241539401106,
727
+ "loss": 3.2293,
728
+ "step": 580
729
+ },
730
+ {
731
+ "epoch": 2.29,
732
+ "learning_rate": 0.00010994073605561706,
733
+ "loss": 3.24,
734
+ "step": 585
735
+ },
736
+ {
737
+ "epoch": 2.31,
738
+ "learning_rate": 0.00010649223714509067,
739
+ "loss": 3.3242,
740
+ "step": 590
741
+ },
742
+ {
743
+ "epoch": 2.33,
744
+ "learning_rate": 0.00010289999999999998,
745
+ "loss": 3.4037,
746
+ "step": 595
747
+ },
748
+ {
749
+ "epoch": 2.35,
750
+ "learning_rate": 9.917765120627052e-05,
751
+ "loss": 3.4103,
752
+ "step": 600
753
+ },
754
+ {
755
+ "epoch": 2.37,
756
+ "learning_rate": 9.53393109078778e-05,
757
+ "loss": 3.2591,
758
+ "step": 605
759
+ },
760
+ {
761
+ "epoch": 2.39,
762
+ "learning_rate": 9.139953924430467e-05,
763
+ "loss": 3.2654,
764
+ "step": 610
765
+ },
766
+ {
767
+ "epoch": 2.41,
768
+ "learning_rate": 8.737328111894491e-05,
769
+ "loss": 3.1585,
770
+ "step": 615
771
+ },
772
+ {
773
+ "epoch": 2.43,
774
+ "learning_rate": 8.327580950796576e-05,
775
+ "loss": 3.257,
776
+ "step": 620
777
+ },
778
+ {
779
+ "epoch": 2.45,
780
+ "learning_rate": 7.912266752467782e-05,
781
+ "loss": 3.2541,
782
+ "step": 625
783
+ },
784
+ {
785
+ "epoch": 2.47,
786
+ "learning_rate": 7.492960945918252e-05,
787
+ "loss": 3.2145,
788
+ "step": 630
789
+ },
790
+ {
791
+ "epoch": 2.49,
792
+ "learning_rate": 7.071254101695329e-05,
793
+ "loss": 3.3335,
794
+ "step": 635
795
+ },
796
+ {
797
+ "epoch": 2.51,
798
+ "learning_rate": 6.648745898304675e-05,
799
+ "loss": 3.3844,
800
+ "step": 640
801
+ },
802
+ {
803
+ "epoch": 2.53,
804
+ "learning_rate": 6.227039054081752e-05,
805
+ "loss": 3.3846,
806
+ "step": 645
807
+ },
808
+ {
809
+ "epoch": 2.55,
810
+ "learning_rate": 5.807733247532229e-05,
811
+ "loss": 3.3111,
812
+ "step": 650
813
+ },
814
+ {
815
+ "epoch": 2.57,
816
+ "learning_rate": 5.392419049203428e-05,
817
+ "loss": 3.1908,
818
+ "step": 655
819
+ },
820
+ {
821
+ "epoch": 2.59,
822
+ "learning_rate": 4.9826718881055135e-05,
823
+ "loss": 3.3639,
824
+ "step": 660
825
+ },
826
+ {
827
+ "epoch": 2.61,
828
+ "learning_rate": 4.580046075569537e-05,
829
+ "loss": 3.0676,
830
+ "step": 665
831
+ },
832
+ {
833
+ "epoch": 2.63,
834
+ "learning_rate": 4.1860689092122226e-05,
835
+ "loss": 3.2607,
836
+ "step": 670
837
+ },
838
+ {
839
+ "epoch": 2.65,
840
+ "learning_rate": 3.8022348793729525e-05,
841
+ "loss": 3.249,
842
+ "step": 675
843
+ },
844
+ {
845
+ "epoch": 2.67,
846
+ "learning_rate": 3.4300000000000054e-05,
847
+ "loss": 3.2803,
848
+ "step": 680
849
+ },
850
+ {
851
+ "epoch": 2.69,
852
+ "learning_rate": 3.0707762854909365e-05,
853
+ "loss": 3.292,
854
+ "step": 685
855
+ },
856
+ {
857
+ "epoch": 2.71,
858
+ "learning_rate": 2.7259263944382986e-05,
859
+ "loss": 3.3944,
860
+ "step": 690
861
+ },
862
+ {
863
+ "epoch": 2.73,
864
+ "learning_rate": 2.3967584605988973e-05,
865
+ "loss": 3.3062,
866
+ "step": 695
867
+ },
868
+ {
869
+ "epoch": 2.75,
870
+ "learning_rate": 2.0845211306943224e-05,
871
+ "loss": 3.2958,
872
+ "step": 700
873
+ },
874
+ {
875
+ "epoch": 2.76,
876
+ "learning_rate": 1.7903988278662788e-05,
877
+ "loss": 3.0765,
878
+ "step": 705
879
+ },
880
+ {
881
+ "epoch": 2.78,
882
+ "learning_rate": 1.515507258753902e-05,
883
+ "loss": 3.2299,
884
+ "step": 710
885
+ },
886
+ {
887
+ "epoch": 2.8,
888
+ "learning_rate": 1.2608891812363205e-05,
889
+ "loss": 3.3755,
890
+ "step": 715
891
+ },
892
+ {
893
+ "epoch": 2.82,
894
+ "learning_rate": 1.0275104488948488e-05,
895
+ "loss": 3.1482,
896
+ "step": 720
897
+ },
898
+ {
899
+ "epoch": 2.84,
900
+ "learning_rate": 8.162563471995208e-06,
901
+ "loss": 3.1407,
902
+ "step": 725
903
+ },
904
+ {
905
+ "epoch": 2.86,
906
+ "learning_rate": 6.279282353180702e-06,
907
+ "loss": 3.3813,
908
+ "step": 730
909
+ },
910
+ {
911
+ "epoch": 2.88,
912
+ "learning_rate": 4.6324050628612214e-06,
913
+ "loss": 3.0962,
914
+ "step": 735
915
+ },
916
+ {
917
+ "epoch": 2.9,
918
+ "learning_rate": 3.228178770697088e-06,
919
+ "loss": 3.2479,
920
+ "step": 740
921
+ },
922
+ {
923
+ "epoch": 2.92,
924
+ "learning_rate": 2.0719301879983714e-06,
925
+ "loss": 3.0949,
926
+ "step": 745
927
+ },
928
+ {
929
+ "epoch": 2.94,
930
+ "learning_rate": 1.1680453616843376e-06,
931
+ "loss": 3.2467,
932
+ "step": 750
933
+ },
934
+ {
935
+ "epoch": 2.96,
936
+ "learning_rate": 5.199530365052086e-07,
937
+ "loss": 3.1761,
938
+ "step": 755
939
+ },
940
+ {
941
+ "epoch": 2.98,
942
+ "learning_rate": 1.3011164863877445e-07,
943
+ "loss": 3.1651,
944
+ "step": 760
945
+ },
946
+ {
947
+ "epoch": 3.0,
948
+ "learning_rate": 0.0,
949
+ "loss": 3.2256,
950
+ "step": 765
951
+ },
952
+ {
953
+ "epoch": 3.0,
954
+ "eval_loss": 3.1829922199249268,
955
+ "eval_runtime": 18.683,
956
+ "eval_samples_per_second": 20.875,
957
+ "eval_steps_per_second": 2.623,
958
+ "step": 765
959
  }
960
  ],
961
+ "max_steps": 765,
962
+ "num_train_epochs": 3,
963
+ "total_flos": 797985865728000.0,
964
  "trial_name": null,
965
  "trial_params": null
966
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6f290bf43c68f84d50f113a11b20c55809e90aac8aa7e58b408f5dbe3f578ab
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0953bfb72e20182c57ba68197da351f78842856132e9f560521cdd9c2df2cb65
3
+ size 2991