Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -2,13 +2,11 @@
 license: other
 library_name: peft
 tags:
-- alignment-handbook
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- ruozhiba
 base_model: 01-ai/Yi-6B
 model-index:
 - name: Yi-6B-ruozhiba
@@ -20,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # Yi-6B-ruozhiba
-This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the ruozhiba dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.1159
 ## Model description

 license: other
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
 datasets:
+- generator
 base_model: 01-ai/Yi-6B
 model-index:
 - name: Yi-6B-ruozhiba
 # Yi-6B-ruozhiba
+This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.1245
 ## Model description

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "down_proj",
-    "q_proj",
     "v_proj",
     "gate_proj",
     "up_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "k_proj",
     "gate_proj",
+    "q_proj",
     "up_proj",
+    "o_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a20c018b80060a73f60e5c891260ae9999ecfa3012ab2bd2ca87fba27cf6335b
 size 72673912

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbefd20df8ebf4b40b04ae7381c360f37afe4056467a892e254c4de2d50638b7
 size 72673912

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 4.98,
-    "eval_loss": 2.11590838432312,
-    "eval_runtime": 2.6923,
     "eval_samples": 23,
-    "eval_samples_per_second": 3.343,
-    "eval_steps_per_second": 3.343,
     "train_loss": 0.0,
-    "train_runtime": 9.2483,
     "train_samples": 217,
-    "train_samples_per_second": 43.251,
-    "train_steps_per_second": 43.251
 }

 {
+    "epoch": 0.56,
+    "eval_loss": 2.124497175216675,
+    "eval_runtime": 2.5799,
     "eval_samples": 23,
+    "eval_samples_per_second": 3.488,
+    "eval_steps_per_second": 3.488,
     "train_loss": 0.0,
+    "train_runtime": 9.5028,
     "train_samples": 217,
+    "train_samples_per_second": 42.093,
+    "train_steps_per_second": 42.093
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.98,
-    "eval_loss": 2.11590838432312,
-    "eval_runtime": 2.6923,
     "eval_samples": 23,
-    "eval_samples_per_second": 3.343,
-    "eval_steps_per_second": 3.343
 }

 {
+    "epoch": 0.56,
+    "eval_loss": 2.124497175216675,
+    "eval_runtime": 2.5799,
     "eval_samples": 23,
+    "eval_samples_per_second": 3.488,
+    "eval_steps_per_second": 3.488
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.98,
     "train_loss": 0.0,
-    "train_runtime": 9.2483,
     "train_samples": 217,
-    "train_samples_per_second": 43.251,
-    "train_steps_per_second": 43.251
 }

 {
+    "epoch": 0.56,
     "train_loss": 0.0,
+    "train_runtime": 9.5028,
     "train_samples": 217,
+    "train_samples_per_second": 42.093,
+    "train_steps_per_second": 42.093
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.975124378109452,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -363,13 +363,49 @@
       "step": 2000
     },
     {
-      "epoch": 4.98,
-      "step": 2000,
-      "total_flos": 1.086187182882816e+16,
       "train_loss": 0.0,
-      "train_runtime": 9.2483,
-      "train_samples_per_second": 43.251,
-      "train_steps_per_second": 43.251
     }
   ],
   "logging_steps": 40,
@@ -377,7 +413,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 1.086187182882816e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5610724925521351,
   "eval_steps": 500,
+  "global_step": 2260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 2000
     },
     {
+      "epoch": 0.51,
+      "learning_rate": 4.999974616484595e-05,
+      "loss": 3.4729,
+      "step": 2040
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 4.99983643554066e-05,
+      "loss": 3.8916,
+      "step": 2080
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 4.999578104083307e-05,
+      "loss": 3.7239,
+      "step": 2120
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 4.9991996345288116e-05,
+      "loss": 3.8084,
+      "step": 2160
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 4.9987010450676885e-05,
+      "loss": 3.2992,
+      "step": 2200
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 4.998082359663817e-05,
+      "loss": 3.809,
+      "step": 2240
+    },
+    {
+      "epoch": 0.56,
+      "step": 2260,
+      "total_flos": 1.095290092486656e+16,
       "train_loss": 0.0,
+      "train_runtime": 9.5028,
+      "train_samples_per_second": 42.093,
+      "train_steps_per_second": 42.093
     }
   ],
   "logging_steps": 40,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 1.095290092486656e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bab7d1f227e080ac970a4415ade05a0138aa5b1ed1bcafc07328dd2dbfc9f1b
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:c21c41a9f50cd22260029a6006a8054ff466ae95127f33436be8d52a3a74a7dc
 size 4728