Model save

Browse files

Files changed (6) hide show

README.md +14 -20
adapter_model.safetensors +1 -1
all_results.json +6 -11
runs/Jun10_18-07-27_user-HP-Z8-Fury-G5-Workstation-Desktop-PC/events.out.tfevents.1718010465.user-HP-Z8-Fury-G5-Workstation-Desktop-PC.8637.0 +2 -2
train_results.json +6 -6
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -2,13 +2,12 @@
 license: gemma
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
 base_model: google/gemma-2b
 datasets:
-- llama-duo/synth_summarize_dataset_dedup
 model-index:
 - name: gemma2b-summarize-gpt4o-128k
   results: []
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # gemma2b-summarize-gpt4o-128k
-This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the llama-duo/synth_summarize_dataset_dedup dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5233
 ## Model description
@@ -52,27 +51,22 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 15
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.2085        | 1.0   | 293  | 2.4863          |
-| 1.1135        | 2.0   | 586  | 2.4516          |
-| 1.0715        | 3.0   | 879  | 2.4473          |
-| 1.0471        | 4.0   | 1172 | 2.4524          |
-| 1.0357        | 5.0   | 1465 | 2.4685          |
-| 0.993         | 6.0   | 1758 | 2.4703          |
-| 0.9941        | 7.0   | 2051 | 2.4906          |
-| 0.9844        | 8.0   | 2344 | 2.4896          |
-| 0.9779        | 9.0   | 2637 | 2.5025          |
-| 0.9639        | 10.0  | 2930 | 2.5126          |
-| 0.952         | 11.0  | 3223 | 2.5192          |
-| 0.9505        | 12.0  | 3516 | 2.5205          |
-| 0.9442        | 13.0  | 3809 | 2.5223          |
-| 0.9469        | 14.0  | 4102 | 2.5227          |
-| 0.9444        | 15.0  | 4395 | 2.5233          |
 ### Framework versions

 license: gemma
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
 base_model: google/gemma-2b
 datasets:
+- generator
 model-index:
 - name: gemma2b-summarize-gpt4o-128k
   results: []
 # gemma2b-summarize-gpt4o-128k
+This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.7978
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 10
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.1249        | 1.0   | 293  | 2.4641          |
+| 1.0415        | 2.0   | 586  | 2.4514          |
+| 0.9915        | 3.0   | 879  | 2.4750          |
+| 0.9551        | 4.0   | 1172 | 2.5292          |
+| 0.9287        | 5.0   | 1465 | 2.5925          |
+| 0.8733        | 6.0   | 1758 | 2.6555          |
+| 0.8577        | 7.0   | 2051 | 2.7316          |
+| 0.8364        | 8.0   | 2344 | 2.7742          |
+| 0.8311        | 9.0   | 2637 | 2.7971          |
+| 0.8243        | 10.0  | 2930 | 2.7978          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac8057f28991d560ba81fa781ad1f23b1cfb0bb8237f05c02f83bf70833ec035
 size 78480320

 version https://git-lfs.github.com/spec/v1
+oid sha256:f15b613b4b9132b5415d28f8f8112434a3f6b55580e613d9c3c48eeb9510e0af
 size 78480320

all_results.json CHANGED Viewed

@@ -1,14 +1,9 @@
 {
-    "epoch": 15.0,
-    "eval_loss": 2.523277521133423,
-    "eval_runtime": 0.5332,
-    "eval_samples": 25,
-    "eval_samples_per_second": 18.753,
-    "eval_steps_per_second": 1.875,
-    "total_flos": 2.581505823377195e+18,
-    "train_loss": 1.0488379673203783,
-    "train_runtime": 23446.7186,
     "train_samples": 129221,
-    "train_samples_per_second": 8.983,
-    "train_steps_per_second": 0.187
 }

 {
+    "epoch": 10.0,
+    "total_flos": 1.7464232891960525e+18,
+    "train_loss": 0.9647074054125633,
+    "train_runtime": 17674.2713,
     "train_samples": 129221,
+    "train_samples_per_second": 7.945,
+    "train_steps_per_second": 0.166
 }

runs/Jun10_18-07-27_user-HP-Z8-Fury-G5-Workstation-Desktop-PC/events.out.tfevents.1718010465.user-HP-Z8-Fury-G5-Workstation-Desktop-PC.8637.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ecaf3e1573e9f0c3a5dafe90e14ed799d74f5c23912fbfea8f6fdd5f914a74b
-size 130527

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5ab0e33b4d4c1f897135ba921c654553191b29c08d4b354ddd6477cc7642357
+size 132418

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 15.0,
-    "total_flos": 2.581505823377195e+18,
-    "train_loss": 1.0488379673203783,
-    "train_runtime": 23446.7186,
     "train_samples": 129221,
-    "train_samples_per_second": 8.983,
-    "train_steps_per_second": 0.187
 }

 {
+    "epoch": 10.0,
+    "total_flos": 1.7464232891960525e+18,
+    "train_loss": 0.9647074054125633,
+    "train_runtime": 17674.2713,
     "train_samples": 129221,
+    "train_samples_per_second": 7.945,
+    "train_steps_per_second": 0.166
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff