Upload folder using huggingface_hub

Files changed (8) hide show

README.md CHANGED Viewed

@@ -22,7 +22,7 @@ GK-MoE-0.1 is a Mixture of Experts (MoE) made with the following models using [L
 ## 🧩 Configuration
 ```yaml
-base_model: argilla/notus-7b-v1
 experts:
   - source_model: argilla/notus-7b-v1
     positive_prompts:

 ## 🧩 Configuration
 ```yaml
+base_model: GritLM/GritLM-7B
 experts:
   - source_model: argilla/notus-7b-v1
     positive_prompts:

config.json CHANGED Viewed

@@ -1,15 +1,26 @@
 {
-  "_name_or_path": "argilla/notus-7b-v1",
   "architectures": [
     "MixtralForCausalLM"
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "intermediate_size": 14336,
   "max_position_embeddings": 32768,
   "model_type": "mixtral",
   "num_attention_heads": 32,
@@ -26,6 +37,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.41.2",
-  "use_cache": false,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "GritLM/GritLM-7B",
   "architectures": [
     "MixtralForCausalLM"
   ],
   "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModel": "GritLM/GritLM-7B--modeling_gritlm7b.MistralModel",
+    "AutoModelForCausalLM": "GritLM/GritLM-7B--modeling_gritlm7b.MistralForCausalLM",
+    "AutoModelForSequenceClassification": "GritLM/GritLM-7B--modeling_gritlm7b.MistralForSequenceClassification"
+  },
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
+  "id2label": {
+    "0": "LABEL_0"
+  },
   "initializer_range": 0.02,
   "intermediate_size": 14336,
+  "label2id": {
+    "LABEL_0": 0
+  },
   "max_position_embeddings": 32768,
   "model_type": "mixtral",
   "num_attention_heads": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.41.2",
+  "use_cache": true,
   "vocab_size": 32000
 }

mergekit_moe_config.yml CHANGED Viewed

@@ -1,5 +1,5 @@
-base_model: argilla/notus-7b-v1
 experts:
   - source_model: argilla/notus-7b-v1
     positive_prompts:

+base_model: GritLM/GritLM-7B
 experts:
   - source_model: argilla/notus-7b-v1
     positive_prompts:

model-00001-of-00013.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3c7ea3769cc03fb5b7f33accb8c889939297796ea8f630d42a8a19c220a5886
 size 1933849912

 version https://git-lfs.github.com/spec/v1
+oid sha256:3747b7732ee3438c6321c10f8f5827bc8e1cb3630c3e9c138ddfe2744d7ca75d
 size 1933849912

model-00012-of-00013.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab3ba073bbb0b9526c5aab1d2500d4bfb3feb1c855631cd2e80b0f65537c0a75
 size 1979981568

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7d454c3bbc7ef3ce2dc7b4c3eef97c2d66b9782699bbe89e843f82f3ce093cf
 size 1979981568

model-00013-of-00013.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:996596a2d3cf0ab98b2dc42d041e306aef28406ed77ffdc1067ed166973cdf30
 size 1879588416

 version https://git-lfs.github.com/spec/v1
+oid sha256:221cf32ee89d16ae085752473b77d36b4708ef3ed889b3b186df1d183cc4b5d6
 size 1879588416

special_tokens_map.json CHANGED Viewed

@@ -1,9 +1,4 @@
 {
-  "additional_special_tokens": [
-    "<unk>",
-    "<s>",
-    "</s>"
-  ],
   "bos_token": {
     "content": "<s>",
     "lstrip": false,

 {
   "bos_token": {
     "content": "<s>",
     "lstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -27,22 +27,24 @@
       "special": true
     }
   },
-  "additional_special_tokens": [
-    "<unk>",
-    "<s>",
-    "</s>"
-  ],
   "bos_token": "<s>",
-  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
-  "truncation_side": "left",
   "unk_token": "<unk>",
-  "use_default_system_prompt": true
 }

       "special": true
     }
   },
+  "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{{ bos_token }}{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
+  "max_length": 2048,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_to_multiple_of": null,
   "pad_token": "<s>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
+  "stride": 0,
   "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }