AlekseyKorshuk
commited on
Commit
·
b2b0ec7
1
Parent(s):
9205123
huggingartists
Browse files- README.md +3 -3
- config.json +2 -2
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +143 -7
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/headie-one")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/headie-one")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3fzj7qkl/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
@@ -36,7 +36,7 @@
|
|
36 |
}
|
37 |
},
|
38 |
"torch_dtype": "float32",
|
39 |
-
"transformers_version": "4.20.
|
40 |
"use_cache": true,
|
41 |
"vocab_size": 50257
|
42 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "headie-one",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
36 |
}
|
37 |
},
|
38 |
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.20.1",
|
40 |
"use_cache": true,
|
41 |
"vocab_size": 50257
|
42 |
}
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss":
|
|
|
1 |
+
{"eval_loss": 0.8823016285896301, "eval_runtime": 2.4753, "eval_samples_per_second": 42.419, "eval_steps_per_second": 5.656, "epoch": 70.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e97c2b39904c2125990372bcd2c0af3ae009d0f37cd2567d4ed60579e6209d63
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f62d82420e69209e064b44c6ac85956e0739f6dc942fcf17f0200b7f3f0b24c
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510396521
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1313c47fb885aa521ff58af331fb5d870ec7e29197ced87841b3265ac00a9ea
|
3 |
size 510396521
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edd9978b73bc8d959cf1091d4e60a8b7eea3426d8eb1a224bb815d849bd77207
|
3 |
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b1b7cf6b6c584836674e51b2bffb4f225e602280b8da1987fbd27d41d1f5b41
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "output/headie-one/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4966,11 +4966,147 @@
|
|
4966 |
"eval_samples_per_second": 42.971,
|
4967 |
"eval_steps_per_second": 5.477,
|
4968 |
"step": 3800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4969 |
}
|
4970 |
],
|
4971 |
-
"max_steps":
|
4972 |
-
"num_train_epochs":
|
4973 |
-
"total_flos":
|
4974 |
"trial_name": null,
|
4975 |
"trial_params": null
|
4976 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8823016285896301,
|
3 |
+
"best_model_checkpoint": "output/headie-one/checkpoint-3900",
|
4 |
+
"epoch": 52.0,
|
5 |
+
"global_step": 3900,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4966 |
"eval_samples_per_second": 42.971,
|
4967 |
"eval_steps_per_second": 5.477,
|
4968 |
"step": 3800
|
4969 |
+
},
|
4970 |
+
{
|
4971 |
+
"epoch": 50.73,
|
4972 |
+
"learning_rate": 2.2697640403783063e-05,
|
4973 |
+
"loss": 0.9066,
|
4974 |
+
"step": 3805
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 50.8,
|
4978 |
+
"learning_rate": 1.3101434185879145e-05,
|
4979 |
+
"loss": 0.6969,
|
4980 |
+
"step": 3810
|
4981 |
+
},
|
4982 |
+
{
|
4983 |
+
"epoch": 50.87,
|
4984 |
+
"learning_rate": 5.930781605717916e-06,
|
4985 |
+
"loss": 1.0504,
|
4986 |
+
"step": 3815
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 50.93,
|
4990 |
+
"learning_rate": 1.4990745896610897e-06,
|
4991 |
+
"loss": 1.4716,
|
4992 |
+
"step": 3820
|
4993 |
+
},
|
4994 |
+
{
|
4995 |
+
"epoch": 51.0,
|
4996 |
+
"learning_rate": 0.0,
|
4997 |
+
"loss": 1.2765,
|
4998 |
+
"step": 3825
|
4999 |
+
},
|
5000 |
+
{
|
5001 |
+
"epoch": 51.0,
|
5002 |
+
"eval_loss": 1.2292253971099854,
|
5003 |
+
"eval_runtime": 2.6668,
|
5004 |
+
"eval_samples_per_second": 39.373,
|
5005 |
+
"eval_steps_per_second": 5.25,
|
5006 |
+
"step": 3825
|
5007 |
+
},
|
5008 |
+
{
|
5009 |
+
"epoch": 51.07,
|
5010 |
+
"learning_rate": 1.499074589660808e-06,
|
5011 |
+
"loss": 1.2797,
|
5012 |
+
"step": 3830
|
5013 |
+
},
|
5014 |
+
{
|
5015 |
+
"epoch": 51.13,
|
5016 |
+
"learning_rate": 5.9307816057173676e-06,
|
5017 |
+
"loss": 1.9281,
|
5018 |
+
"step": 3835
|
5019 |
+
},
|
5020 |
+
{
|
5021 |
+
"epoch": 51.2,
|
5022 |
+
"learning_rate": 1.3101434185878354e-05,
|
5023 |
+
"loss": 1.4825,
|
5024 |
+
"step": 3840
|
5025 |
+
},
|
5026 |
+
{
|
5027 |
+
"epoch": 51.27,
|
5028 |
+
"learning_rate": 2.2697640403782067e-05,
|
5029 |
+
"loss": 1.4677,
|
5030 |
+
"step": 3845
|
5031 |
+
},
|
5032 |
+
{
|
5033 |
+
"epoch": 51.33,
|
5034 |
+
"learning_rate": 3.429999999999976e-05,
|
5035 |
+
"loss": 1.0241,
|
5036 |
+
"step": 3850
|
5037 |
+
},
|
5038 |
+
{
|
5039 |
+
"epoch": 51.4,
|
5040 |
+
"learning_rate": 4.740143418587843e-05,
|
5041 |
+
"loss": 1.0786,
|
5042 |
+
"step": 3855
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 51.47,
|
5046 |
+
"learning_rate": 6.142934741983887e-05,
|
5047 |
+
"loss": 1.4638,
|
5048 |
+
"step": 3860
|
5049 |
+
},
|
5050 |
+
{
|
5051 |
+
"epoch": 51.53,
|
5052 |
+
"learning_rate": 7.5770652580161e-05,
|
5053 |
+
"loss": 1.0569,
|
5054 |
+
"step": 3865
|
5055 |
+
},
|
5056 |
+
{
|
5057 |
+
"epoch": 51.6,
|
5058 |
+
"learning_rate": 8.979856581412144e-05,
|
5059 |
+
"loss": 1.0274,
|
5060 |
+
"step": 3870
|
5061 |
+
},
|
5062 |
+
{
|
5063 |
+
"epoch": 51.67,
|
5064 |
+
"learning_rate": 0.00010289999999999844,
|
5065 |
+
"loss": 0.9631,
|
5066 |
+
"step": 3875
|
5067 |
+
},
|
5068 |
+
{
|
5069 |
+
"epoch": 51.73,
|
5070 |
+
"learning_rate": 0.00011450235959621783,
|
5071 |
+
"loss": 0.7978,
|
5072 |
+
"step": 3880
|
5073 |
+
},
|
5074 |
+
{
|
5075 |
+
"epoch": 51.8,
|
5076 |
+
"learning_rate": 0.00012409856581412041,
|
5077 |
+
"loss": 0.838,
|
5078 |
+
"step": 3885
|
5079 |
+
},
|
5080 |
+
{
|
5081 |
+
"epoch": 51.87,
|
5082 |
+
"learning_rate": 0.00013126921839428258,
|
5083 |
+
"loss": 0.6216,
|
5084 |
+
"step": 3890
|
5085 |
+
},
|
5086 |
+
{
|
5087 |
+
"epoch": 51.93,
|
5088 |
+
"learning_rate": 0.00013570092541033876,
|
5089 |
+
"loss": 1.2543,
|
5090 |
+
"step": 3895
|
5091 |
+
},
|
5092 |
+
{
|
5093 |
+
"epoch": 52.0,
|
5094 |
+
"learning_rate": 0.0001372,
|
5095 |
+
"loss": 1.0127,
|
5096 |
+
"step": 3900
|
5097 |
+
},
|
5098 |
+
{
|
5099 |
+
"epoch": 52.0,
|
5100 |
+
"eval_loss": 0.8823016285896301,
|
5101 |
+
"eval_runtime": 2.4876,
|
5102 |
+
"eval_samples_per_second": 42.209,
|
5103 |
+
"eval_steps_per_second": 5.628,
|
5104 |
+
"step": 3900
|
5105 |
}
|
5106 |
],
|
5107 |
+
"max_steps": 5250,
|
5108 |
+
"num_train_epochs": 70,
|
5109 |
+
"total_flos": 4029907009536000.0,
|
5110 |
"trial_name": null,
|
5111 |
"trial_params": null
|
5112 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3311
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:202761b0e853ed0912633dc56b6e49fb1661b0995bfb2fd6aec7da7426b6f24c
|
3 |
size 3311
|