huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +2 -2
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +143 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/headie-one")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/x7sbsok3/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/23dok566) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/23dok566/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/headie-one")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3fzj7qkl/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Headie One's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1d1n36x9/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "huggingartists/headie-one",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -36,7 +36,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.20.0",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "headie-one",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~5699154138565063~~, "eval_runtime": 1.~~6396~~, "eval_samples_per_second": 40.~~864~~, "eval_steps_per_second": 5.~~489~~, "epoch": 19.0}


1	+ {"eval_loss": 0.8823016285896301, "eval_runtime": 2.4753, "eval_samples_per_second": 42.419, "eval_steps_per_second": 5.656, "epoch": 70.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f961cbbd78988f8623923dfc3e5aedc36596ae0f89789aa50a153214ef47c9ba
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:e97c2b39904c2125990372bcd2c0af3ae009d0f37cd2567d4ed60579e6209d63
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7491b6f16bd0a1d8e4cb329eaef6bf7bd58975ad38795c84abed5455f08eca81
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f62d82420e69209e064b44c6ac85956e0739f6dc942fcf17f0200b7f3f0b24c
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97acf1b0eb486b438bdd175b53cfcfd5b7761ae16a12d32f98ac946a816b0cbe
 size 510396521

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1313c47fb885aa521ff58af331fb5d870ec7e29197ced87841b3265ac00a9ea
 size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8420f7ec058563a1409e89652133384b907f8881b615732be2215b79b14c7ca6
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:edd9978b73bc8d959cf1091d4e60a8b7eea3426d8eb1a224bb815d849bd77207
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4d0b85035a9999c17bb635a766784cff44d93cc2adedcb471b74f0a33c5e80a
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b1b7cf6b6c584836674e51b2bffb4f225e602280b8da1987fbd27d41d1f5b41
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 5.471560001373291,
-  "best_model_checkpoint": "output/headie-one/checkpoint-76",
-  "epoch": 50.0,
-  "global_step": 3800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4966,11 +4966,147 @@
       "eval_samples_per_second": 42.971,
       "eval_steps_per_second": 5.477,
       "step": 3800
     }
   ],
-  "max_steps": 3800,
-  "num_train_epochs": 50,
-  "total_flos": 3925912780800000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.8823016285896301,
+  "best_model_checkpoint": "output/headie-one/checkpoint-3900",
+  "epoch": 52.0,
+  "global_step": 3900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 42.971,
       "eval_steps_per_second": 5.477,
       "step": 3800
+    },
+    {
+      "epoch": 50.73,
+      "learning_rate": 2.2697640403783063e-05,
+      "loss": 0.9066,
+      "step": 3805
+    },
+    {
+      "epoch": 50.8,
+      "learning_rate": 1.3101434185879145e-05,
+      "loss": 0.6969,
+      "step": 3810
+    },
+    {
+      "epoch": 50.87,
+      "learning_rate": 5.930781605717916e-06,
+      "loss": 1.0504,
+      "step": 3815
+    },
+    {
+      "epoch": 50.93,
+      "learning_rate": 1.4990745896610897e-06,
+      "loss": 1.4716,
+      "step": 3820
+    },
+    {
+      "epoch": 51.0,
+      "learning_rate": 0.0,
+      "loss": 1.2765,
+      "step": 3825
+    },
+    {
+      "epoch": 51.0,
+      "eval_loss": 1.2292253971099854,
+      "eval_runtime": 2.6668,
+      "eval_samples_per_second": 39.373,
+      "eval_steps_per_second": 5.25,
+      "step": 3825
+    },
+    {
+      "epoch": 51.07,
+      "learning_rate": 1.499074589660808e-06,
+      "loss": 1.2797,
+      "step": 3830
+    },
+    {
+      "epoch": 51.13,
+      "learning_rate": 5.9307816057173676e-06,
+      "loss": 1.9281,
+      "step": 3835
+    },
+    {
+      "epoch": 51.2,
+      "learning_rate": 1.3101434185878354e-05,
+      "loss": 1.4825,
+      "step": 3840
+    },
+    {
+      "epoch": 51.27,
+      "learning_rate": 2.2697640403782067e-05,
+      "loss": 1.4677,
+      "step": 3845
+    },
+    {
+      "epoch": 51.33,
+      "learning_rate": 3.429999999999976e-05,
+      "loss": 1.0241,
+      "step": 3850
+    },
+    {
+      "epoch": 51.4,
+      "learning_rate": 4.740143418587843e-05,
+      "loss": 1.0786,
+      "step": 3855
+    },
+    {
+      "epoch": 51.47,
+      "learning_rate": 6.142934741983887e-05,
+      "loss": 1.4638,
+      "step": 3860
+    },
+    {
+      "epoch": 51.53,
+      "learning_rate": 7.5770652580161e-05,
+      "loss": 1.0569,
+      "step": 3865
+    },
+    {
+      "epoch": 51.6,
+      "learning_rate": 8.979856581412144e-05,
+      "loss": 1.0274,
+      "step": 3870
+    },
+    {
+      "epoch": 51.67,
+      "learning_rate": 0.00010289999999999844,
+      "loss": 0.9631,
+      "step": 3875
+    },
+    {
+      "epoch": 51.73,
+      "learning_rate": 0.00011450235959621783,
+      "loss": 0.7978,
+      "step": 3880
+    },
+    {
+      "epoch": 51.8,
+      "learning_rate": 0.00012409856581412041,
+      "loss": 0.838,
+      "step": 3885
+    },
+    {
+      "epoch": 51.87,
+      "learning_rate": 0.00013126921839428258,
+      "loss": 0.6216,
+      "step": 3890
+    },
+    {
+      "epoch": 51.93,
+      "learning_rate": 0.00013570092541033876,
+      "loss": 1.2543,
+      "step": 3895
+    },
+    {
+      "epoch": 52.0,
+      "learning_rate": 0.0001372,
+      "loss": 1.0127,
+      "step": 3900
+    },
+    {
+      "epoch": 52.0,
+      "eval_loss": 0.8823016285896301,
+      "eval_runtime": 2.4876,
+      "eval_samples_per_second": 42.209,
+      "eval_steps_per_second": 5.628,
+      "step": 3900
     }
   ],
+  "max_steps": 5250,
+  "num_train_epochs": 70,
+  "total_flos": 4029907009536000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ce9d32bd26e298502c691514e6e62cd03430e7295e1fd7ea31138547385731b
 size 3311

 version https://git-lfs.github.com/spec/v1
+oid sha256:202761b0e853ed0912633dc56b6e49fb1661b0995bfb2fd6aec7da7426b6f24c
 size 3311