End of training

Browse files

Files changed (14) hide show

README.md +23 -25
config.json +1 -1
final_checkpoint/config.json +1 -1
final_checkpoint/generation_config.json +1 -1
final_checkpoint/model-00001-of-00003.safetensors +1 -1
final_checkpoint/model-00002-of-00003.safetensors +1 -1
final_checkpoint/model-00003-of-00003.safetensors +1 -1
generation_config.json +1 -1
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
tokenizer.json +7 -17
tokenizer_config.json +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4442
 ## Model description
@@ -37,11 +37,9 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-07
-- train_batch_size: 2
 - eval_batch_size: 1
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 100
@@ -51,31 +49,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.8849        | 0.0447 | 50   | 1.8726          |
-| 1.4871        | 0.0895 | 100  | 1.4453          |
-| 0.8608        | 0.1342 | 150  | 0.7955          |
-| 0.4432        | 0.1790 | 200  | 0.4648          |
-| 0.4269        | 0.2237 | 250  | 0.4556          |
-| 0.424         | 0.2685 | 300  | 0.4519          |
-| 0.4417        | 0.3132 | 350  | 0.4497          |
-| 0.4253        | 0.3579 | 400  | 0.4481          |
-| 0.4247        | 0.4027 | 450  | 0.4470          |
-| 0.4152        | 0.4474 | 500  | 0.4461          |
-| 0.4116        | 0.4922 | 550  | 0.4453          |
-| 0.4174        | 0.5369 | 600  | 0.4448          |
-| 0.4201        | 0.5817 | 650  | 0.4446          |
-| 0.423         | 0.6264 | 700  | 0.4444          |
-| 0.4243        | 0.6711 | 750  | 0.4441          |
-| 0.4325        | 0.7159 | 800  | 0.4442          |
-| 0.4128        | 0.7606 | 850  | 0.4441          |
-| 0.4207        | 0.8054 | 900  | 0.4441          |
-| 0.424         | 0.8501 | 950  | 0.4442          |
-| 0.4219        | 0.8949 | 1000 | 0.4442          |
 ### Framework versions
-- Transformers 4.42.3
 - Pytorch 2.0.0+cu117
 - Datasets 2.20.0
 - Tokenizers 0.19.1

 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0586
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-07
+- train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 100
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 2.0168        | 0.0112 | 50   | 1.9854          |
+| 1.3848        | 0.0224 | 100  | 1.3059          |
+| 0.1374        | 0.0336 | 150  | 0.1142          |
+| 0.0099        | 0.0448 | 200  | 0.0587          |
+| 0.0076        | 0.0559 | 250  | 0.0581          |
+| 0.0073        | 0.0671 | 300  | 0.0580          |
+| 0.0071        | 0.0783 | 350  | 0.0587          |
+| 0.0071        | 0.0895 | 400  | 0.0586          |
+| 0.0069        | 0.1007 | 450  | 0.0589          |
+| 0.0068        | 0.1119 | 500  | 0.0586          |
+| 0.0068        | 0.1231 | 550  | 0.0586          |
+| 0.0067        | 0.1343 | 600  | 0.0588          |
+| 0.0067        | 0.1454 | 650  | 0.0589          |
+| 0.0066        | 0.1566 | 700  | 0.0590          |
+| 0.0066        | 0.1678 | 750  | 0.0587          |
+| 0.0066        | 0.1790 | 800  | 0.0588          |
+| 0.0066        | 0.1902 | 850  | 0.0588          |
+| 0.0066        | 0.2014 | 900  | 0.0586          |
+| 0.0066        | 0.2126 | 950  | 0.0586          |
+| 0.0066        | 0.2238 | 1000 | 0.0586          |
 ### Framework versions
+- Transformers 4.42.4
 - Pytorch 2.0.0+cu117
 - Datasets 2.20.0
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.42.3",
   "use_cache": false,
   "vocab_size": 32000
 }

   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.42.4",
   "use_cache": false,
   "vocab_size": 32000
 }

final_checkpoint/config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.42.3",
   "use_cache": false,
   "vocab_size": 32000
 }

   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.42.4",
   "use_cache": false,
   "vocab_size": 32000
 }

final_checkpoint/generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.42.3"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.42.4"
 }

final_checkpoint/model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bffa714844631e46a66169aae2df3428ee7e8c704768d49234b90f19d0b1da5
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c4ff2d9256673a5f7be81f7babe4ecc2a742e2e9a7a057c017f32b467b44635
 size 4943162240

final_checkpoint/model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8acd8dcfe3eef914d1d34914d548e1870b2a02468daba96fea34fc7c7811a7a
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:37188812dfd2fb736ec74c32b80e5a1f39f6f86553dff4c6326501bfb861f7a7
 size 4999819232

final_checkpoint/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ef257d88fcb1f000dd053a9f59648c5e08d5cf58f2a643113ad6feab93d5633
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:a137b71b1dc010833e81b3b0584e56c10fe876c73e8601a88b6ebdf3bfd3087b
 size 4540516256

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.42.3"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.42.4"
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bffa714844631e46a66169aae2df3428ee7e8c704768d49234b90f19d0b1da5
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c4ff2d9256673a5f7be81f7babe4ecc2a742e2e9a7a057c017f32b467b44635
 size 4943162240

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8acd8dcfe3eef914d1d34914d548e1870b2a02468daba96fea34fc7c7811a7a
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:37188812dfd2fb736ec74c32b80e5a1f39f6f86553dff4c6326501bfb861f7a7
 size 4999819232

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ef257d88fcb1f000dd053a9f59648c5e08d5cf58f2a643113ad6feab93d5633
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:a137b71b1dc010833e81b3b0584e56c10fe876c73e8601a88b6ebdf3bfd3087b
 size 4540516256

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 2048,
     "strategy": "LongestFirst",
     "stride": 0
   },
@@ -36,23 +36,13 @@
       "special": true
     }
   ],
-  "normalizer": {
-    "type": "Sequence",
-    "normalizers": [
-      {
-        "type": "Prepend",
-        "prepend": "▁"
-      },
-      {
-        "type": "Replace",
-        "pattern": {
-          "String": " "
-        },
-        "content": "▁"
-      }
-    ]
   },
-  "pre_tokenizer": null,
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 1024,
     "strategy": "LongestFirst",
     "stride": 0
   },
       "special": true
     }
   ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Metaspace",
+    "replacement": "▁",
+    "prepend_scheme": "first",
+    "split": false
   },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

tokenizer_config.json CHANGED Viewed

@@ -30,10 +30,10 @@
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b97858d85089058d23fda7dcd7e9dfe996e7baa38803cfdfa42e0622f5268cdd
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fdddd6c0ec31e1895f8a65b351660a02d706be113d62507b7d83f44900e1071
 size 4667