ahmedgongi10/mistral_version1

Browse files

Files changed (4) hide show

README.md +15 -19
adapter_config.json +6 -6
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.1539
 ## Model description
@@ -36,37 +36,33 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
-- num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.621         | 1.0   | 102  | 1.4441          |
-| 1.3795        | 2.0   | 204  | 1.3877          |
-| 1.1377        | 3.0   | 306  | 1.3730          |
-| 0.877         | 4.0   | 408  | 1.3983          |
-| 0.6379        | 5.0   | 510  | 1.4731          |
-| 0.4464        | 6.0   | 612  | 1.5966          |
-| 0.3066        | 7.0   | 714  | 1.7195          |
-| 0.2135        | 8.0   | 816  | 1.8925          |
-| 0.1534        | 9.0   | 918  | 2.0300          |
-| 0.1169        | 10.0  | 1020 | 2.1539          |
 ### Framework versions
-- PEFT 0.9.0
-- Transformers 4.38.2
 - Pytorch 2.1.2
 - Datasets 2.1.0
 - Tokenizers 0.15.2

 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.0892
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 4
+- eval_batch_size: 2
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
 - lr_scheduler_warmup_steps: 2
+- num_epochs: 1
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.0643        | 0.17  | 100  | 1.1166          |
+| 1.0302        | 0.34  | 200  | 1.1029          |
+| 1.1972        | 0.51  | 300  | 1.0958          |
+| 1.1332        | 0.68  | 400  | 1.0910          |
+| 1.0084        | 0.85  | 500  | 1.0892          |
 ### Framework versions
+- PEFT 0.10.0
+- Transformers 4.39.0
 - Pytorch 2.1.2
 - Datasets 2.1.0
 - Tokenizers 0.15.2

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -10,20 +10,20 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
     "k_proj",
-    "q_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "o_proj",
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcf2b2af8484f3720c11cf4032f571a4fea8c109e4f3761234b523dc8b8ca7cb
-size 54564720

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fe68d09cd92062bb737ad1922ca97a2b53af099918acd6851929ef93437e8c9
+size 27297032

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41354039343c372be4c341f180e5a29b03a246f58d8594f371ec9b1d961ce905
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:7523d83c98241ee4c34cb76e35bf170b32d4a35ae6bc7cabbc063b9764478e8b
 size 4856