Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +46 -13
config.json +3 -2
mergekit_config.yml +25 -11
model-00001-of-00001.safetensors +3 -0
model.safetensors.index.json +1 -1
tokenizer_config.json +2 -0

README.md CHANGED Viewed

@@ -5,18 +5,37 @@ tags:
 - lazymergekit
 - SciPhi/SciPhi-Mistral-7B-32k
 - SciPhi/SciPhi-Mistral-7B-32k
 base_model:
 - SciPhi/SciPhi-Mistral-7B-32k
 - SciPhi/SciPhi-Mistral-7B-32k
 ---
 # SciPhi-Mistral-7B-32k-sliced
-This is purely an experiment in sliced layer extraction to find active layers.
 SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
 * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
 * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
 ## 🧩 Configuration
@@ -24,20 +43,34 @@ SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMerge
 slices:
   - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
-        layer_range: [0, 6]
       - model: SciPhi/SciPhi-Mistral-7B-32k
-        layer_range: [26, 32]
-merge_method: slerp
-base_model: SciPhi/SciPhi-Mistral-7B-32k
-parameters:
-  t:
-    - filter: self_attn
-      value: [0, 0.5, 0.3, 0.7, 1]
-    - filter: mlp
-      value: [1, 0.5, 0.7, 0.3, 0]
-    - value: 0.5 # fallback for rest of tensors
 tokenizer_source: union
 dtype: float16

 - lazymergekit
 - SciPhi/SciPhi-Mistral-7B-32k
 - SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
 base_model:
 - SciPhi/SciPhi-Mistral-7B-32k
 - SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
+- SciPhi/SciPhi-Mistral-7B-32k
 ---
 # SciPhi-Mistral-7B-32k-sliced
 SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
 * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
 * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
+* [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
 ## 🧩 Configuration
 slices:
   - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [3, 3]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [5, 5]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [6, 6]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [10, 10]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [17, 17]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [18, 18]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [19, 19]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [20, 20]
+  - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [23, 23]
+merge_method: passthrough
 tokenizer_source: union
 dtype: float16

config.json CHANGED Viewed

@@ -3,6 +3,7 @@
   "architectures": [
     "MistralForCausalLM"
   ],
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
@@ -12,14 +13,14 @@
   "max_position_embeddings": 32768,
   "model_type": "mistral",
   "num_attention_heads": 32,
-  "num_hidden_layers": 6,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.35.2",
   "use_cache": false,
   "vocab_size": 32000
 }

   "architectures": [
     "MistralForCausalLM"
   ],
+  "attention_dropout": 0.0,
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
   "max_position_embeddings": 32768,
   "model_type": "mistral",
   "num_attention_heads": 32,
+  "num_hidden_layers": 0,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.37.2",
   "use_cache": false,
   "vocab_size": 32000
 }

mergekit_config.yml CHANGED Viewed

@@ -2,20 +2,34 @@
 slices:
   - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
-        layer_range: [0, 6]
       - model: SciPhi/SciPhi-Mistral-7B-32k
-        layer_range: [26, 32]
-merge_method: slerp
-base_model: SciPhi/SciPhi-Mistral-7B-32k
-parameters:
-  t:
-    - filter: self_attn
-      value: [0, 0.5, 0.3, 0.7, 1]
-    - filter: mlp
-      value: [1, 0.5, 0.7, 0.3, 0]
-    - value: 0.5 # fallback for rest of tensors
 tokenizer_source: union
 dtype: float16

 slices:
   - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [3, 3]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [5, 5]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [6, 6]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [10, 10]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [17, 17]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [18, 18]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [19, 19]
+  - sources:
+      - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [20, 20]
+  - sources:
       - model: SciPhi/SciPhi-Mistral-7B-32k
+        layer_range: [23, 23]
+merge_method: passthrough
 tokenizer_source: union
 dtype: float16

model-00001-of-00001.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37eb374525dcbbd712ce3958074be99256e72e846ecfc9b6d7b90d4e2af919bd
+size 524296512

model.safetensors.index.json CHANGED Viewed

@@ -1 +1 @@

- {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-~~00002~~.safetensors", "lm_head.weight": "model-00001-of-~~00002~~.safetensors", "model.norm.weight": "model-00001-of-~~00002.safetensors", "model.layers.5.mlp.down_proj.weight": "model-~~00001~~-of-00002~~.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors"}}


1	+ {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,6 @@
 {
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",