Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +19 -28
all_results.json +11 -11
config.json +1 -1
eval_results.json +6 -6
pytorch_model.bin +1 -1
train_results.json +5 -5
trainer_state.json +58 -16
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,36 +1,27 @@
 ---
-license: other
-datasets:
-- Open-Orca/OpenOrca
-- ehartford/wizard_vicuna_70k_unfiltered
 tags:
-- code
-- prompt
-- reverse prompt
-widget:
-- text: "Photosynthesis is the process by which plants, algae and some bacteria convert carbon dioxide and water into glucose and oxygen, using the energy of sunlight. This process is fundamental to life on Earth, as it provides the basis for almost all food chains and also contributes to the carbon cycle by helping to regulate the concentration of carbon dioxide in the atmosphere.\n[REVERSED-PROMPT] "
-  example_title: "reverse prompt"
 ---
-# core-prompt-reverser-opt-1.3b
-This model is a fine-tuned version of [facebook/opt-1.3b](https://huggingface.co/facebook/opt-1.3b) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.4784
-- Accuracy: 0.6753
 ## Model description
- [INSTRUCTION] {your question}
- [RESPONSE] {model response}
-or
-[RESPONSE] {response}
-[REVERSED-PROMPT] {model prompt reversed}
 ## Intended uses & limitations
@@ -38,9 +29,9 @@ More information needed
 ## Training and evaluation data
-Wizard, openOrca, custom data
 ### Training hyperparameters
@@ -54,7 +45,7 @@ The following hyperparameters were used during training:
 - num_epochs: 1.0
 ### Training results
- this model is still training, it ran only 5% of the total training data, it will finish in 4/set
 ### Framework versions
@@ -62,4 +53,4 @@ The following hyperparameters were used during training:
 - Transformers 4.33.0.dev0
 - Pytorch 2.1.0.dev20230605+cu121
 - Datasets 2.14.4
-- Tokenizers 0.13.3

 ---
+base_model: ss5
 tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: ss6
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# ss6
+This model is a fine-tuned version of [ss5](https://huggingface.co/ss5) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.2950
+- Accuracy: 0.7084
 ## Model description
+More information needed
 ## Intended uses & limitations
 ## Training and evaluation data
+More information needed
+## Training procedure
 ### Training hyperparameters
 - num_epochs: 1.0
 ### Training results
 ### Framework versions
 - Transformers 4.33.0.dev0
 - Pytorch 2.1.0.dev20230605+cu121
 - Datasets 2.14.4
+- Tokenizers 0.13.3

all_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
     "epoch": 1.0,
-    "eval_accuracy": 0.6752705025786228,
-    "eval_loss": 1.4784468412399292,
-    "eval_runtime": 55.0697,
     "eval_samples": 232,
-    "eval_samples_per_second": 4.213,
-    "eval_steps_per_second": 0.527,
-    "perplexity": 4.3861280346659415,
-    "train_loss": 1.700456760354238,
-    "train_runtime": 3303.7788,
-    "train_samples": 11678,
-    "train_samples_per_second": 3.535,
-    "train_steps_per_second": 0.442
 }

 {
     "epoch": 1.0,
+    "eval_accuracy": 0.7083881079987865,
+    "eval_loss": 1.294954776763916,
+    "eval_runtime": 53.4048,
     "eval_samples": 232,
+    "eval_samples_per_second": 4.344,
+    "eval_steps_per_second": 0.543,
+    "perplexity": 3.6508308680217363,
+    "train_loss": 1.2382940134009472,
+    "train_runtime": 11022.1698,
+    "train_samples": 38349,
+    "train_samples_per_second": 3.479,
+    "train_steps_per_second": 0.435
 }

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "facebook/opt-1.3b",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

 {
+  "_name_or_path": "ss5",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
     "epoch": 1.0,
-    "eval_accuracy": 0.6752705025786228,
-    "eval_loss": 1.4784468412399292,
-    "eval_runtime": 55.0697,
     "eval_samples": 232,
-    "eval_samples_per_second": 4.213,
-    "eval_steps_per_second": 0.527,
-    "perplexity": 4.3861280346659415
 }

 {
     "epoch": 1.0,
+    "eval_accuracy": 0.7083881079987865,
+    "eval_loss": 1.294954776763916,
+    "eval_runtime": 53.4048,
     "eval_samples": 232,
+    "eval_samples_per_second": 4.344,
+    "eval_steps_per_second": 0.543,
+    "perplexity": 3.6508308680217363
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c82ac35a6d50d4f6668cf6fc2037aece6e394df4a65ef6ff81ad131822fe6e70
 size 2631648218

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8c1c4b4f568da599148a79e669bbd5590428dfaf7064385627c5948af153ced
 size 2631648218

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 1.700456760354238,
-    "train_runtime": 3303.7788,
-    "train_samples": 11678,
-    "train_samples_per_second": 3.535,
-    "train_steps_per_second": 0.442
 }

 {
     "epoch": 1.0,
+    "train_loss": 1.2382940134009472,
+    "train_runtime": 11022.1698,
+    "train_samples": 38349,
+    "train_samples_per_second": 3.479,
+    "train_steps_per_second": 0.435
 }

trainer_state.json CHANGED Viewed

@@ -3,38 +3,80 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 1460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.34,
-      "learning_rate": 3.287671232876712e-05,
-      "loss": 1.776,
       "step": 500
     },
     {
-      "epoch": 0.68,
-      "learning_rate": 1.5753424657534248e-05,
-      "loss": 1.6672,
       "step": 1000
     },
     {
       "epoch": 1.0,
-      "step": 1460,
-      "total_flos": 8.671679350623437e+16,
-      "train_loss": 1.700456760354238,
-      "train_runtime": 3303.7788,
-      "train_samples_per_second": 3.535,
-      "train_steps_per_second": 0.442
     }
   ],
   "logging_steps": 500,
-  "max_steps": 1460,
   "num_train_epochs": 1,
-  "save_steps": -1460,
-  "total_flos": 8.671679350623437e+16,
   "trial_name": null,
   "trial_params": null
 }

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 4794,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.1,
+      "learning_rate": 4.478514810179391e-05,
+      "loss": 1.3792,
       "step": 500
     },
     {
+      "epoch": 0.21,
+      "learning_rate": 3.9570296203587817e-05,
+      "loss": 1.3043,
       "step": 1000
     },
+    {
+      "epoch": 0.31,
+      "learning_rate": 3.435544430538173e-05,
+      "loss": 1.2641,
+      "step": 1500
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 2.9140592407175638e-05,
+      "loss": 1.2319,
+      "step": 2000
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 2.3925740508969545e-05,
+      "loss": 1.2035,
+      "step": 2500
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 1.8710888610763455e-05,
+      "loss": 1.2072,
+      "step": 3000
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 1.3496036712557364e-05,
+      "loss": 1.1955,
+      "step": 3500
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 8.281184814351273e-06,
+      "loss": 1.1949,
+      "step": 4000
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 3.0663329161451816e-06,
+      "loss": 1.194,
+      "step": 4500
+    },
     {
       "epoch": 1.0,
+      "step": 4794,
+      "total_flos": 2.8476642525865574e+17,
+      "train_loss": 1.2382940134009472,
+      "train_runtime": 11022.1698,
+      "train_samples_per_second": 3.479,
+      "train_steps_per_second": 0.435
     }
   ],
   "logging_steps": 500,
+  "max_steps": 4794,
   "num_train_epochs": 1,
+  "save_steps": -4794,
+  "total_flos": 2.8476642525865574e+17,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68e5b42e7290d1422423b958c1cf982b94ae5db9e379c560d977a8e2c38edab8
 size 4472

 version https://git-lfs.github.com/spec/v1
+oid sha256:13435d31ff4c3a7b19d246d3aedb0e34f659b86388d243bd7658841e425d0944
 size 4472