mkurman commited on
Commit
b85c09e
·
verified ·
1 Parent(s): 075b644
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  llama-3.2-MedIT-3B-Instruct-GGUF filter=lfs diff=lfs merge=lfs -text
38
  llama-3.2-MedIT-3B-Instruct-GGUF.gguf filter=lfs diff=lfs merge=lfs -text
 
 
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  llama-3.2-MedIT-3B-Instruct-GGUF filter=lfs diff=lfs merge=lfs -text
38
  llama-3.2-MedIT-3B-Instruct-GGUF.gguf filter=lfs diff=lfs merge=lfs -text
39
+ llama-3.2-MedIT-3B-o1-GGUF.gguf filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/mkurman/gitlab/ai/model-trainer/results/llama-o4/2025-01-03 11_25_00/final",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -9,10 +9,8 @@
9
  "eos_token_id": [
10
  128001,
11
  128008,
12
- 128009,
13
- 128011
14
  ],
15
- "hdic_topk": 8,
16
  "head_dim": 128,
17
  "hidden_act": "silu",
18
  "hidden_size": 3072,
 
1
  {
2
+ "_name_or_path": "/home/mkurman/gitlab/ai/model-trainer/results/llama-o4/2024-12-30 21_11_27/checkpoint-2200",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
9
  "eos_token_id": [
10
  128001,
11
  128008,
12
+ 128009
 
13
  ],
 
14
  "head_dim": 128,
15
  "hidden_act": "silu",
16
  "hidden_size": 3072,
llama-3.2-MedIT-3B-o1-GGUF.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa64302ab206d608175b590645c3f609b3dd624a18c1dfdada316279d0b87ee
3
+ size 3840526720
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f1a9e2d2ec8402fb74884aef6429216563a358c023cb463bb0301de3da98c72
3
- size 4965799096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d453f7fe71126f4a91a83976378a4bbf7ff2a739d42258e2ab8f03e61e794d1b
3
+ size 4998794968
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89a405811b33d17d72ac8adbed723ecebd80857a4271724ab30875e42d59d933
3
- size 1459729952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4435071cb8d7c88f008a5bc02e9eba5c1a832e6094f3ef4f77d54e9f73f9102f
3
+ size 2214739072
model.safetensors.index.json CHANGED
@@ -1,261 +1 @@
1
- {
2
- "metadata": {
3
- "total_size": 6425499648
4
- },
5
- "weight_map": {
6
- "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
7
- "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
8
- "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
9
- "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
10
- "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
11
- "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
12
- "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
13
- "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
14
- "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
15
- "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
16
- "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
17
- "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
18
- "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
19
- "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
20
- "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
21
- "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
22
- "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
23
- "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
24
- "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
25
- "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
26
- "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
27
- "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
28
- "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
29
- "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
30
- "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
31
- "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
32
- "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
33
- "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
34
- "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
35
- "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
36
- "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
37
- "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
38
- "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
39
- "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
40
- "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
41
- "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
42
- "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
43
- "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
44
- "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
45
- "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
46
- "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
47
- "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
48
- "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
49
- "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
50
- "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
- "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
- "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
- "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
- "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
- "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
- "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
- "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
58
- "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
59
- "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
60
- "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
61
- "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
62
- "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
63
- "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
64
- "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
65
- "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
66
- "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
67
- "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
68
- "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
69
- "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
70
- "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
71
- "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
72
- "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
73
- "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
74
- "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
75
- "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
76
- "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
77
- "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
78
- "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
79
- "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
80
- "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
81
- "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
82
- "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
83
- "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
- "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
85
- "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
86
- "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
87
- "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
88
- "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
89
- "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
90
- "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
91
- "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
92
- "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
93
- "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
94
- "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
95
- "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
96
- "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
97
- "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
98
- "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
99
- "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
100
- "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
101
- "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
102
- "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
- "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
- "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
105
- "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
106
- "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
107
- "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
108
- "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
109
- "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
110
- "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
111
- "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
112
- "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
113
- "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
114
- "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
115
- "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
116
- "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
117
- "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
118
- "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
119
- "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
120
- "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
121
- "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
122
- "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
123
- "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
124
- "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
125
- "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
126
- "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
127
- "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
128
- "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
129
- "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
130
- "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
131
- "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
132
- "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
133
- "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
134
- "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
135
- "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
136
- "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
137
- "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
138
- "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
139
- "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
140
- "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
141
- "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
142
- "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
143
- "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
144
- "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
145
- "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
146
- "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
147
- "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
148
- "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
149
- "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
150
- "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
151
- "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
- "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
- "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
- "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
- "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
- "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
157
- "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
158
- "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
159
- "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
160
- "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
161
- "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
162
- "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
163
- "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
164
- "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
165
- "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
166
- "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
167
- "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
168
- "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
169
- "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
170
- "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
171
- "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
172
- "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
173
- "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
174
- "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
175
- "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
176
- "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
177
- "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
178
- "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
179
- "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
180
- "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
181
- "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
182
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
183
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
184
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
185
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
186
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
187
- "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
- "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
- "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
- "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
- "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
- "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
193
- "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
194
- "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
195
- "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
196
- "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
197
- "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
198
- "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
199
- "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
200
- "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
201
- "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
202
- "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
203
- "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
204
- "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
205
- "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
206
- "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
207
- "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
208
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
209
- "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
210
- "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
211
- "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
212
- "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
213
- "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
214
- "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
215
- "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
216
- "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
217
- "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
218
- "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
219
- "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
220
- "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
221
- "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
222
- "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
223
- "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
224
- "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
225
- "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
226
- "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
227
- "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
228
- "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
229
- "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
230
- "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
231
- "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
232
- "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
233
- "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
234
- "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
235
- "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
236
- "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
237
- "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
238
- "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
239
- "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
240
- "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
241
- "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
242
- "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
243
- "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
244
- "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
245
- "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
246
- "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
247
- "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
248
- "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
249
- "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
250
- "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
251
- "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
252
- "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
253
- "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
254
- "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
255
- "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
256
- "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
257
- "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
258
- "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
259
- "model.norm.weight": "model-00002-of-00002.safetensors"
260
- }
261
- }
 
1
+ {"metadata": {"mergekit_version": "0.0.5.2", "total_size": 7213504512}, "weight_map": {"lm_head.weight": "model-00001-of-00002.safetensors", "model.embed_tokens.weight": "model-00001-of-00002.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.input_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "model.norm.weight": "model-00002-of-00002.safetensors"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -7,14 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|eot_id|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end_of_text|>",
 
 
 
 
 
 
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d34fc616434d7e9cb4e84b22856d8a8140055716e58f70d8ea42d18259842d29
3
- size 17209844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "128002": {
20
- "content": "<Thought>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -25,7 +25,7 @@
25
  "special": true
26
  },
27
  "128003": {
28
- "content": "</Thought>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  "special": true
42
  },
43
  "128005": {
44
- "content": "<Output>",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "128011": {
92
- "content": "</Output>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -2060,4 +2060,4 @@
2060
  "model_max_length": 131072,
2061
  "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
- }
 
17
  "special": true
18
  },
19
  "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  },
27
  "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
41
  "special": true
42
  },
43
  "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
2060
  "model_max_length": 131072,
2061
  "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
+ }