win10 commited on
Commit
a866406
1 Parent(s): dddc312

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - unsloth/Mistral-Nemo-Base-2407
4
+ library_name: transformers
5
+ tags:
6
+ - mergekit
7
+ - merge
8
+
9
+ ---
10
+ # merge
11
+
12
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
13
+
14
+ ## Merge Details
15
+ ### Merge Method
16
+
17
+ This model was merged using the passthrough merge method.
18
+
19
+ ### Models Merged
20
+
21
+ The following models were included in the merge:
22
+ * [unsloth/Mistral-Nemo-Base-2407](https://huggingface.co/unsloth/Mistral-Nemo-Base-2407)
23
+
24
+ ### Configuration
25
+
26
+ The following YAML configuration was used to produce this model:
27
+
28
+ ```yaml
29
+ dtype: bfloat16
30
+ merge_method: passthrough
31
+ slices:
32
+ - sources:
33
+ - layer_range: [0, 2]
34
+ model: unsloth/Mistral-Nemo-Base-2407
35
+ parameters:
36
+ scale:
37
+ - filter: o_proj
38
+ value: 0.0
39
+ - filter: down_proj
40
+ value: 0.0
41
+ - value: 1.0
42
+ - sources:
43
+ - layer_range: [1, 3]
44
+ model: unsloth/Mistral-Nemo-Base-2407
45
+ parameters:
46
+ scale:
47
+ - filter: o_proj
48
+ value: 0.0
49
+ - filter: down_proj
50
+ value: 0.0
51
+ - value: 1.0
52
+ - sources:
53
+ - layer_range: [2, 4]
54
+ model: unsloth/Mistral-Nemo-Base-2407
55
+ parameters:
56
+ scale:
57
+ - filter: o_proj
58
+ value: 0.0
59
+ - filter: down_proj
60
+ value: 0.0
61
+ - value: 1.0
62
+ - sources:
63
+ - layer_range: [3, 5]
64
+ model: unsloth/Mistral-Nemo-Base-2407
65
+ parameters:
66
+ scale:
67
+ - filter: o_proj
68
+ value: 0.0
69
+ - filter: down_proj
70
+ value: 0.0
71
+ - value: 1.0
72
+ # 以下是新增的層
73
+ - sources:
74
+ - layer_range: [4, 6]
75
+ model: unsloth/Mistral-Nemo-Base-2407
76
+ parameters:
77
+ scale:
78
+ - filter: o_proj
79
+ value: 0.0
80
+ - filter: down_proj
81
+ value: 0.0
82
+ - value: 1.0
83
+ - sources:
84
+ - layer_range: [5, 7]
85
+ model: unsloth/Mistral-Nemo-Base-2407
86
+ parameters:
87
+ scale:
88
+ - filter: o_proj
89
+ value: 0.0
90
+ - filter: down_proj
91
+ value: 0.0
92
+ - value: 1.0
93
+ - sources:
94
+ - layer_range: [6, 8]
95
+ model: unsloth/Mistral-Nemo-Base-2407
96
+ parameters:
97
+ scale:
98
+ - filter: o_proj
99
+ value: 0.0
100
+ - filter: down_proj
101
+ value: 0.0
102
+ - value: 1.0
103
+ - sources:
104
+ - layer_range: [7, 9]
105
+ model: unsloth/Mistral-Nemo-Base-2407
106
+ parameters:
107
+ scale:
108
+ - filter: o_proj
109
+ value: 0.0
110
+ - filter: down_proj
111
+ value: 0.0
112
+ - value: 1.0
113
+ - sources:
114
+ - layer_range: [8, 10]
115
+ model: unsloth/Mistral-Nemo-Base-2407
116
+ parameters:
117
+ scale:
118
+ - filter: o_proj
119
+ value: 0.0
120
+ - filter: down_proj
121
+ value: 0.0
122
+ - value: 1.0
123
+ - sources:
124
+ - layer_range: [9, 11]
125
+ model: unsloth/Mistral-Nemo-Base-2407
126
+ parameters:
127
+ scale:
128
+ - filter: o_proj
129
+ value: 0.0
130
+ - filter: down_proj
131
+ value: 0.0
132
+ - value: 1.0
133
+ - sources:
134
+ - layer_range: [10, 12]
135
+ model: unsloth/Mistral-Nemo-Base-2407
136
+ parameters:
137
+ scale:
138
+ - filter: o_proj
139
+ value: 0.0
140
+ - filter: down_proj
141
+ value: 0.0
142
+ - value: 1.0
143
+ - sources:
144
+ - layer_range: [11, 13]
145
+ model: unsloth/Mistral-Nemo-Base-2407
146
+ parameters:
147
+ scale:
148
+ - filter: o_proj
149
+ value: 0.0
150
+ - filter: down_proj
151
+ value: 0.0
152
+ - value: 1.0
153
+ - sources:
154
+ - layer_range: [12, 14]
155
+ model: unsloth/Mistral-Nemo-Base-2407
156
+ parameters:
157
+ scale:
158
+ - filter: o_proj
159
+ value: 0.0
160
+ - filter: down_proj
161
+ value: 0.0
162
+ - value: 1.0
163
+ - sources:
164
+ - layer_range: [13, 15]
165
+ model: unsloth/Mistral-Nemo-Base-2407
166
+ parameters:
167
+ scale:
168
+ - filter: o_proj
169
+ value: 0.0
170
+ - filter: down_proj
171
+ value: 0.0
172
+ - value: 1.0
173
+ - sources:
174
+ - layer_range: [14, 16]
175
+ model: unsloth/Mistral-Nemo-Base-2407
176
+ parameters:
177
+ scale:
178
+ - filter: o_proj
179
+ value: 0.0
180
+ - filter: down_proj
181
+ value: 0.0
182
+ - value: 1.0
183
+ - sources:
184
+ - layer_range: [15, 17]
185
+ model: unsloth/Mistral-Nemo-Base-2407
186
+ parameters:
187
+ scale:
188
+ - filter: o_proj
189
+ value: 0.0
190
+ - filter: down_proj
191
+ value: 0.0
192
+ - value: 1.0
193
+ - sources:
194
+ - layer_range: [16, 18]
195
+ model: unsloth/Mistral-Nemo-Base-2407
196
+ parameters:
197
+ scale:
198
+ - filter: o_proj
199
+ value: 0.0
200
+ - filter: down_proj
201
+ value: 0.0
202
+ - value: 1.0
203
+ - sources:
204
+ - layer_range: [17, 19]
205
+ model: unsloth/Mistral-Nemo-Base-2407
206
+ parameters:
207
+ scale:
208
+ - filter: o_proj
209
+ value: 0.0
210
+ - filter: down_proj
211
+ value: 0.0
212
+ - value: 1.0
213
+ - sources:
214
+ - layer_range: [18, 20]
215
+ model: unsloth/Mistral-Nemo-Base-2407
216
+ parameters:
217
+ scale:
218
+ - filter: o_proj
219
+ value: 0.0
220
+ - filter: down_proj
221
+ value: 0.0
222
+ - value: 1.0
223
+ - sources:
224
+ - layer_range: [19, 21]
225
+ model: unsloth/Mistral-Nemo-Base-2407
226
+ parameters:
227
+ scale:
228
+ - filter: o_proj
229
+ value: 0.0
230
+ - filter: down_proj
231
+ value: 0.0
232
+ - value: 1.0
233
+ - sources:
234
+ - layer_range: [20, 22]
235
+ model: unsloth/Mistral-Nemo-Base-2407
236
+ parameters:
237
+ scale:
238
+ - filter: o_proj
239
+ value: 0.0
240
+ - filter: down_proj
241
+ value: 0.0
242
+ - value: 1.0
243
+ - sources:
244
+ - layer_range: [21, 23]
245
+ model: unsloth/Mistral-Nemo-Base-2407
246
+ parameters:
247
+ scale:
248
+ - filter: o_proj
249
+ value: 0.0
250
+ - filter: down_proj
251
+ value: 0.0
252
+ - value: 1.0
253
+ - sources:
254
+ - layer_range: [22, 24]
255
+ model: unsloth/Mistral-Nemo-Base-2407
256
+ parameters:
257
+ scale:
258
+ - filter: o_proj
259
+ value: 0.0
260
+ - filter: down_proj
261
+ value: 0.0
262
+ - value: 1.0
263
+ - sources:
264
+ - layer_range: [23, 25]
265
+ model: unsloth/Mistral-Nemo-Base-2407
266
+ parameters:
267
+ scale:
268
+ - filter: o_proj
269
+ value: 0.0
270
+ - filter: down_proj
271
+ value: 0.0
272
+ - value: 1.0
273
+ - sources:
274
+ - layer_range: [24, 26]
275
+ model: unsloth/Mistral-Nemo-Base-2407
276
+ parameters:
277
+ scale:
278
+ - filter: o_proj
279
+ value: 0.0
280
+ - filter: down_proj
281
+ value: 0.0
282
+ - value: 1.0
283
+ - sources:
284
+ - layer_range: [25, 27]
285
+ model: unsloth/Mistral-Nemo-Base-2407
286
+ parameters:
287
+ scale:
288
+ - filter: o_proj
289
+ value: 0.0
290
+ - filter: down_proj
291
+ value: 0.0
292
+ - value: 1.0
293
+ - sources:
294
+ - layer_range: [26, 28]
295
+ model: unsloth/Mistral-Nemo-Base-2407
296
+ parameters:
297
+ scale:
298
+ - filter: o_proj
299
+ value: 0.0
300
+ - filter: down_proj
301
+ value: 0.0
302
+ - value: 1.0
303
+ - sources:
304
+ - layer_range: [27, 29]
305
+ model: unsloth/Mistral-Nemo-Base-2407
306
+ parameters:
307
+ scale:
308
+ - filter: o_proj
309
+ value: 0.0
310
+ - filter: down_proj
311
+ value: 0.0
312
+ - value: 1.0
313
+ - sources:
314
+ - layer_range: [28, 30]
315
+ model: unsloth/Mistral-Nemo-Base-2407
316
+ parameters:
317
+ scale:
318
+ - filter: o_proj
319
+ value: 0.0
320
+ - filter: down_proj
321
+ value: 0.0
322
+ - value: 1.0
323
+ - sources:
324
+ - layer_range: [29, 31]
325
+ model: unsloth/Mistral-Nemo-Base-2407
326
+ parameters:
327
+ scale:
328
+ - filter: o_proj
329
+ value: 0.0
330
+ - filter: down_proj
331
+ value: 0.0
332
+ - value: 1.0
333
+ - sources:
334
+ - layer_range: [30, 32]
335
+ model: unsloth/Mistral-Nemo-Base-2407
336
+ parameters:
337
+ scale:
338
+ - filter: o_proj
339
+ value: 0.0
340
+ - filter: down_proj
341
+ value: 0.0
342
+ - value: 1.0
343
+ - sources:
344
+ - layer_range: [31, 33]
345
+ model: unsloth/Mistral-Nemo-Base-2407
346
+ parameters:
347
+ scale:
348
+ - filter: o_proj
349
+ value: 0.0
350
+ - filter: down_proj
351
+ value: 0.0
352
+ - value: 1.0
353
+ - sources:
354
+ - layer_range: [32, 34]
355
+ model: unsloth/Mistral-Nemo-Base-2407
356
+ parameters:
357
+ scale:
358
+ - filter: o_proj
359
+ value: 0.0
360
+ - filter: down_proj
361
+ value: 0.0
362
+ - value: 1.0
363
+ - sources:
364
+ - layer_range: [33, 35]
365
+ model: unsloth/Mistral-Nemo-Base-2407
366
+ parameters:
367
+ scale:
368
+ - filter: o_proj
369
+ value: 0.0
370
+ - filter: down_proj
371
+ value: 0.0
372
+ - value: 1.0
373
+ - sources:
374
+ - layer_range: [34, 36]
375
+ model: unsloth/Mistral-Nemo-Base-2407
376
+ parameters:
377
+ scale:
378
+ - filter: o_proj
379
+ value: 0.0
380
+ - filter: down_proj
381
+ value: 0.0
382
+ - value: 1.0
383
+ - sources:
384
+ - layer_range: [35, 37]
385
+ model: unsloth/Mistral-Nemo-Base-2407
386
+ parameters:
387
+ scale:
388
+ - filter: o_proj
389
+ value: 0.0
390
+ - filter: down_proj
391
+ value: 0.0
392
+ - value: 1.0
393
+ - sources:
394
+ - layer_range: [36, 38]
395
+ model: unsloth/Mistral-Nemo-Base-2407
396
+ parameters:
397
+ scale:
398
+ - filter: o_proj
399
+ value: 0.0
400
+ - filter: down_proj
401
+ value: 0.0
402
+ - value: 1.0
403
+ - sources:
404
+ - layer_range: [37, 39]
405
+ model: unsloth/Mistral-Nemo-Base-2407
406
+ parameters:
407
+ scale:
408
+ - filter: o_proj
409
+ value: 0.0
410
+ - filter: down_proj
411
+ value: 0.0
412
+ - value: 1.0
413
+ - sources:
414
+ - layer_range: [38, 40]
415
+ model: unsloth/Mistral-Nemo-Base-2407
416
+ parameters:
417
+ scale:
418
+ - filter: o_proj
419
+ value: 0.0
420
+ - filter: down_proj
421
+ value: 0.0
422
+ - value: 1.0
423
+ ```
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "unsloth/Mistral-Nemo-Base-2407",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
+ "max_position_embeddings": 1024000,
15
+ "model_type": "mistral",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 78,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 10,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.45.1",
26
+ "unsloth_version": "2024.9",
27
+ "use_cache": true,
28
+ "vocab_size": 131072
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dtype: bfloat16
2
+ merge_method: passthrough
3
+ slices:
4
+ - sources:
5
+ - layer_range: [0, 2]
6
+ model: unsloth/Mistral-Nemo-Base-2407
7
+ parameters:
8
+ scale:
9
+ - filter: o_proj
10
+ value: 0.0
11
+ - filter: down_proj
12
+ value: 0.0
13
+ - value: 1.0
14
+ - sources:
15
+ - layer_range: [1, 3]
16
+ model: unsloth/Mistral-Nemo-Base-2407
17
+ parameters:
18
+ scale:
19
+ - filter: o_proj
20
+ value: 0.0
21
+ - filter: down_proj
22
+ value: 0.0
23
+ - value: 1.0
24
+ - sources:
25
+ - layer_range: [2, 4]
26
+ model: unsloth/Mistral-Nemo-Base-2407
27
+ parameters:
28
+ scale:
29
+ - filter: o_proj
30
+ value: 0.0
31
+ - filter: down_proj
32
+ value: 0.0
33
+ - value: 1.0
34
+ - sources:
35
+ - layer_range: [3, 5]
36
+ model: unsloth/Mistral-Nemo-Base-2407
37
+ parameters:
38
+ scale:
39
+ - filter: o_proj
40
+ value: 0.0
41
+ - filter: down_proj
42
+ value: 0.0
43
+ - value: 1.0
44
+ # 以下是新增的層
45
+ - sources:
46
+ - layer_range: [4, 6]
47
+ model: unsloth/Mistral-Nemo-Base-2407
48
+ parameters:
49
+ scale:
50
+ - filter: o_proj
51
+ value: 0.0
52
+ - filter: down_proj
53
+ value: 0.0
54
+ - value: 1.0
55
+ - sources:
56
+ - layer_range: [5, 7]
57
+ model: unsloth/Mistral-Nemo-Base-2407
58
+ parameters:
59
+ scale:
60
+ - filter: o_proj
61
+ value: 0.0
62
+ - filter: down_proj
63
+ value: 0.0
64
+ - value: 1.0
65
+ - sources:
66
+ - layer_range: [6, 8]
67
+ model: unsloth/Mistral-Nemo-Base-2407
68
+ parameters:
69
+ scale:
70
+ - filter: o_proj
71
+ value: 0.0
72
+ - filter: down_proj
73
+ value: 0.0
74
+ - value: 1.0
75
+ - sources:
76
+ - layer_range: [7, 9]
77
+ model: unsloth/Mistral-Nemo-Base-2407
78
+ parameters:
79
+ scale:
80
+ - filter: o_proj
81
+ value: 0.0
82
+ - filter: down_proj
83
+ value: 0.0
84
+ - value: 1.0
85
+ - sources:
86
+ - layer_range: [8, 10]
87
+ model: unsloth/Mistral-Nemo-Base-2407
88
+ parameters:
89
+ scale:
90
+ - filter: o_proj
91
+ value: 0.0
92
+ - filter: down_proj
93
+ value: 0.0
94
+ - value: 1.0
95
+ - sources:
96
+ - layer_range: [9, 11]
97
+ model: unsloth/Mistral-Nemo-Base-2407
98
+ parameters:
99
+ scale:
100
+ - filter: o_proj
101
+ value: 0.0
102
+ - filter: down_proj
103
+ value: 0.0
104
+ - value: 1.0
105
+ - sources:
106
+ - layer_range: [10, 12]
107
+ model: unsloth/Mistral-Nemo-Base-2407
108
+ parameters:
109
+ scale:
110
+ - filter: o_proj
111
+ value: 0.0
112
+ - filter: down_proj
113
+ value: 0.0
114
+ - value: 1.0
115
+ - sources:
116
+ - layer_range: [11, 13]
117
+ model: unsloth/Mistral-Nemo-Base-2407
118
+ parameters:
119
+ scale:
120
+ - filter: o_proj
121
+ value: 0.0
122
+ - filter: down_proj
123
+ value: 0.0
124
+ - value: 1.0
125
+ - sources:
126
+ - layer_range: [12, 14]
127
+ model: unsloth/Mistral-Nemo-Base-2407
128
+ parameters:
129
+ scale:
130
+ - filter: o_proj
131
+ value: 0.0
132
+ - filter: down_proj
133
+ value: 0.0
134
+ - value: 1.0
135
+ - sources:
136
+ - layer_range: [13, 15]
137
+ model: unsloth/Mistral-Nemo-Base-2407
138
+ parameters:
139
+ scale:
140
+ - filter: o_proj
141
+ value: 0.0
142
+ - filter: down_proj
143
+ value: 0.0
144
+ - value: 1.0
145
+ - sources:
146
+ - layer_range: [14, 16]
147
+ model: unsloth/Mistral-Nemo-Base-2407
148
+ parameters:
149
+ scale:
150
+ - filter: o_proj
151
+ value: 0.0
152
+ - filter: down_proj
153
+ value: 0.0
154
+ - value: 1.0
155
+ - sources:
156
+ - layer_range: [15, 17]
157
+ model: unsloth/Mistral-Nemo-Base-2407
158
+ parameters:
159
+ scale:
160
+ - filter: o_proj
161
+ value: 0.0
162
+ - filter: down_proj
163
+ value: 0.0
164
+ - value: 1.0
165
+ - sources:
166
+ - layer_range: [16, 18]
167
+ model: unsloth/Mistral-Nemo-Base-2407
168
+ parameters:
169
+ scale:
170
+ - filter: o_proj
171
+ value: 0.0
172
+ - filter: down_proj
173
+ value: 0.0
174
+ - value: 1.0
175
+ - sources:
176
+ - layer_range: [17, 19]
177
+ model: unsloth/Mistral-Nemo-Base-2407
178
+ parameters:
179
+ scale:
180
+ - filter: o_proj
181
+ value: 0.0
182
+ - filter: down_proj
183
+ value: 0.0
184
+ - value: 1.0
185
+ - sources:
186
+ - layer_range: [18, 20]
187
+ model: unsloth/Mistral-Nemo-Base-2407
188
+ parameters:
189
+ scale:
190
+ - filter: o_proj
191
+ value: 0.0
192
+ - filter: down_proj
193
+ value: 0.0
194
+ - value: 1.0
195
+ - sources:
196
+ - layer_range: [19, 21]
197
+ model: unsloth/Mistral-Nemo-Base-2407
198
+ parameters:
199
+ scale:
200
+ - filter: o_proj
201
+ value: 0.0
202
+ - filter: down_proj
203
+ value: 0.0
204
+ - value: 1.0
205
+ - sources:
206
+ - layer_range: [20, 22]
207
+ model: unsloth/Mistral-Nemo-Base-2407
208
+ parameters:
209
+ scale:
210
+ - filter: o_proj
211
+ value: 0.0
212
+ - filter: down_proj
213
+ value: 0.0
214
+ - value: 1.0
215
+ - sources:
216
+ - layer_range: [21, 23]
217
+ model: unsloth/Mistral-Nemo-Base-2407
218
+ parameters:
219
+ scale:
220
+ - filter: o_proj
221
+ value: 0.0
222
+ - filter: down_proj
223
+ value: 0.0
224
+ - value: 1.0
225
+ - sources:
226
+ - layer_range: [22, 24]
227
+ model: unsloth/Mistral-Nemo-Base-2407
228
+ parameters:
229
+ scale:
230
+ - filter: o_proj
231
+ value: 0.0
232
+ - filter: down_proj
233
+ value: 0.0
234
+ - value: 1.0
235
+ - sources:
236
+ - layer_range: [23, 25]
237
+ model: unsloth/Mistral-Nemo-Base-2407
238
+ parameters:
239
+ scale:
240
+ - filter: o_proj
241
+ value: 0.0
242
+ - filter: down_proj
243
+ value: 0.0
244
+ - value: 1.0
245
+ - sources:
246
+ - layer_range: [24, 26]
247
+ model: unsloth/Mistral-Nemo-Base-2407
248
+ parameters:
249
+ scale:
250
+ - filter: o_proj
251
+ value: 0.0
252
+ - filter: down_proj
253
+ value: 0.0
254
+ - value: 1.0
255
+ - sources:
256
+ - layer_range: [25, 27]
257
+ model: unsloth/Mistral-Nemo-Base-2407
258
+ parameters:
259
+ scale:
260
+ - filter: o_proj
261
+ value: 0.0
262
+ - filter: down_proj
263
+ value: 0.0
264
+ - value: 1.0
265
+ - sources:
266
+ - layer_range: [26, 28]
267
+ model: unsloth/Mistral-Nemo-Base-2407
268
+ parameters:
269
+ scale:
270
+ - filter: o_proj
271
+ value: 0.0
272
+ - filter: down_proj
273
+ value: 0.0
274
+ - value: 1.0
275
+ - sources:
276
+ - layer_range: [27, 29]
277
+ model: unsloth/Mistral-Nemo-Base-2407
278
+ parameters:
279
+ scale:
280
+ - filter: o_proj
281
+ value: 0.0
282
+ - filter: down_proj
283
+ value: 0.0
284
+ - value: 1.0
285
+ - sources:
286
+ - layer_range: [28, 30]
287
+ model: unsloth/Mistral-Nemo-Base-2407
288
+ parameters:
289
+ scale:
290
+ - filter: o_proj
291
+ value: 0.0
292
+ - filter: down_proj
293
+ value: 0.0
294
+ - value: 1.0
295
+ - sources:
296
+ - layer_range: [29, 31]
297
+ model: unsloth/Mistral-Nemo-Base-2407
298
+ parameters:
299
+ scale:
300
+ - filter: o_proj
301
+ value: 0.0
302
+ - filter: down_proj
303
+ value: 0.0
304
+ - value: 1.0
305
+ - sources:
306
+ - layer_range: [30, 32]
307
+ model: unsloth/Mistral-Nemo-Base-2407
308
+ parameters:
309
+ scale:
310
+ - filter: o_proj
311
+ value: 0.0
312
+ - filter: down_proj
313
+ value: 0.0
314
+ - value: 1.0
315
+ - sources:
316
+ - layer_range: [31, 33]
317
+ model: unsloth/Mistral-Nemo-Base-2407
318
+ parameters:
319
+ scale:
320
+ - filter: o_proj
321
+ value: 0.0
322
+ - filter: down_proj
323
+ value: 0.0
324
+ - value: 1.0
325
+ - sources:
326
+ - layer_range: [32, 34]
327
+ model: unsloth/Mistral-Nemo-Base-2407
328
+ parameters:
329
+ scale:
330
+ - filter: o_proj
331
+ value: 0.0
332
+ - filter: down_proj
333
+ value: 0.0
334
+ - value: 1.0
335
+ - sources:
336
+ - layer_range: [33, 35]
337
+ model: unsloth/Mistral-Nemo-Base-2407
338
+ parameters:
339
+ scale:
340
+ - filter: o_proj
341
+ value: 0.0
342
+ - filter: down_proj
343
+ value: 0.0
344
+ - value: 1.0
345
+ - sources:
346
+ - layer_range: [34, 36]
347
+ model: unsloth/Mistral-Nemo-Base-2407
348
+ parameters:
349
+ scale:
350
+ - filter: o_proj
351
+ value: 0.0
352
+ - filter: down_proj
353
+ value: 0.0
354
+ - value: 1.0
355
+ - sources:
356
+ - layer_range: [35, 37]
357
+ model: unsloth/Mistral-Nemo-Base-2407
358
+ parameters:
359
+ scale:
360
+ - filter: o_proj
361
+ value: 0.0
362
+ - filter: down_proj
363
+ value: 0.0
364
+ - value: 1.0
365
+ - sources:
366
+ - layer_range: [36, 38]
367
+ model: unsloth/Mistral-Nemo-Base-2407
368
+ parameters:
369
+ scale:
370
+ - filter: o_proj
371
+ value: 0.0
372
+ - filter: down_proj
373
+ value: 0.0
374
+ - value: 1.0
375
+ - sources:
376
+ - layer_range: [37, 39]
377
+ model: unsloth/Mistral-Nemo-Base-2407
378
+ parameters:
379
+ scale:
380
+ - filter: o_proj
381
+ value: 0.0
382
+ - filter: down_proj
383
+ value: 0.0
384
+ - value: 1.0
385
+ - sources:
386
+ - layer_range: [38, 40]
387
+ model: unsloth/Mistral-Nemo-Base-2407
388
+ parameters:
389
+ scale:
390
+ - filter: o_proj
391
+ value: 0.0
392
+ - filter: down_proj
393
+ value: 0.0
394
+ - value: 1.0
model-00001-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b848e9c197e4f1ecb4d46f86505432e6657a6000c49bfea0a457218aff074e91
3
+ size 4907421648
model-00002-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72340cc1562b95489b5c883f154b089e796739be494969bbf21b32601ee9b3e1
3
+ size 4865607480
model-00003-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9207bb434efd418b29e9f6f351b21246a6142643ff472d295cad4921baec52
3
+ size 4949451440
model-00004-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc3aa9f9ac02ee99a076c900968356c7a6a17e620a4f6fe821d48c46443b564
3
+ size 4865607456
model-00005-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ad1908b369df31a5853a6ac260784ab1b509df32eac3b363213f41da95a580
3
+ size 4949451440
model-00006-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18ecb76ca5015f39ba66e463937ede712ca7946d3a8935649475a091abacef4
3
+ size 4865607456
model-00007-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2370407681acc61d20b8d5ce8765a43b5d1e63fffda2d92335a4ed0f2671b5f
3
+ size 4949451440
model-00008-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff3aa06bfebed88ee590297b992dbd0fc8fd932191c108a042441c35442e4e2
3
+ size 4907529456
model-00009-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e612a57ee78022f5736c7d98864b44dcf5b3e4a07db1e1ad3e4dbfc1fba868
3
+ size 4865607464
model-00010-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310d195dac20e5fdba4e1bbcfbfedfcced5b1d772293e9e48d65827ef606b09d
3
+ size 1090551736
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 45216204800}, "weight_map": {"lm_head.weight": "model-00001-of-00010.safetensors", "model.embed_tokens.weight": "model-00001-of-00010.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors", "model.layers.20.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.19.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00001-of-00010.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00001-of-00010.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00010.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00010.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.21.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.23.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.25.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.27.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00010.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00010.safetensors", "model.layers.30.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.29.input_layernorm.weight": "model-00002-of-00010.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.31.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.33.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.35.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.mlp.up_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00010.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00010.safetensors", "model.layers.38.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.37.input_layernorm.weight": "model-00003-of-00010.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00003-of-00010.safetensors", "model.layers.38.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.37.mlp.gate_proj.weight": "model-00003-of-00010.safetensors", "model.layers.38.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.37.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.3.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.39.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.40.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.41.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.42.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.43.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.44.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.mlp.down_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.mlp.gate_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.mlp.up_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00004-of-00010.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00004-of-00010.safetensors", "model.layers.46.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.45.input_layernorm.weight": "model-00004-of-00010.safetensors", "model.layers.46.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.47.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.48.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.49.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.50.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.51.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.52.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.mlp.up_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00005-of-00010.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00005-of-00010.safetensors", "model.layers.54.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.53.input_layernorm.weight": "model-00005-of-00010.safetensors", "model.layers.54.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.53.mlp.down_proj.weight": "model-00005-of-00010.safetensors", "model.layers.54.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.53.mlp.gate_proj.weight": "model-00005-of-00010.safetensors", "model.layers.54.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.53.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.55.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.56.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.57.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.58.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.5.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.59.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.60.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.mlp.down_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.mlp.gate_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.mlp.up_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00006-of-00010.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00006-of-00010.safetensors", "model.layers.62.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.61.input_layernorm.weight": "model-00006-of-00010.safetensors", "model.layers.62.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.63.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.64.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.63.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.64.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.64.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.65.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.66.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.65.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.66.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.66.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.65.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.67.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.68.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.mlp.up_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.67.post_attention_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.68.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.self_attn.k_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.self_attn.o_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.self_attn.q_proj.weight": "model-00007-of-00010.safetensors", "model.layers.68.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.67.self_attn.v_proj.weight": "model-00007-of-00010.safetensors", "model.layers.70.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.69.input_layernorm.weight": "model-00007-of-00010.safetensors", "model.layers.70.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.69.mlp.down_proj.weight": "model-00007-of-00010.safetensors", "model.layers.70.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.69.mlp.gate_proj.weight": "model-00007-of-00010.safetensors", "model.layers.70.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.69.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.70.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.69.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.70.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.69.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.70.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.69.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.70.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.69.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.70.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.69.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.71.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.72.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.71.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.72.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.72.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.71.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.73.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.74.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.73.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.74.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.74.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.73.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.75.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.76.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.75.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.76.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.76.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.75.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.77.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.mlp.up_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.post_attention_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.77.self_attn.k_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.self_attn.o_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.self_attn.q_proj.weight": "model-00008-of-00010.safetensors", "model.layers.77.self_attn.v_proj.weight": "model-00008-of-00010.safetensors", "model.layers.8.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.7.input_layernorm.weight": "model-00008-of-00010.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00008-of-00010.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00008-of-00010.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.9.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.11.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.13.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.15.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00009-of-00010.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00009-of-00010.safetensors", "model.layers.18.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.17.input_layernorm.weight": "model-00009-of-00010.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.norm.weight": "model-00010-of-00010.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff