RonanMcGovern commited on
Commit
d3c2efd
1 Parent(s): 7de10d4

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -14,12 +14,13 @@
14
  "num_attention_heads": 32,
15
  "num_hidden_layers": 32,
16
  "num_key_value_heads": 32,
 
17
  "pretraining_tp": 1,
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "float16",
22
- "transformers_version": "4.32.0.dev0",
23
  "use_cache": true,
24
  "vocab_size": 32000
25
  }
 
14
  "num_attention_heads": 32,
15
  "num_hidden_layers": 32,
16
  "num_key_value_heads": 32,
17
+ "pad_token_id": 0,
18
  "pretraining_tp": 1,
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
+ "transformers_version": "4.31.0",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "pad_token_id": 0,
6
  "temperature": 0.9,
7
  "top_p": 0.6,
8
- "transformers_version": "4.32.0.dev0"
9
  }
 
5
  "pad_token_id": 0,
6
  "temperature": 0.9,
7
  "top_p": 0.6,
8
+ "transformers_version": "4.31.0"
9
  }
pytorch_model-00001-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c3786e6ed3d264da7fc577f9cc822d2553d0fb8e470032d61631e51239a1e3
3
- size 4939008626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30f70db5da26524a404a814fec791f642fe34616b4d0471a09574e25472f5680
3
+ size 4939015766
pytorch_model-00002-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:319a35e81db61c256c219c679425e3d8e517da5284d2734d85b8b541fa775186
3
- size 4947415002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0e8166e13ae3992f2353a2d5df042c773712c140c19794c5daca9f65d58775
3
+ size 4947422190
pytorch_model-00003-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6b46500610f707c890aaba5a3f22f0ae2cbd570a43c5d362ae3f064b9ee8106
3
- size 3590505657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b4cb4e54238d6eb9e9d56bc550077e1ec7d51f3d0596a1ae9dc29013129964
3
+ size 3590510473
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 13476831232
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00003-of-00003.bin",
@@ -13,6 +13,7 @@
13
  "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
14
  "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
15
  "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
16
  "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
17
  "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
18
  "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -22,6 +23,7 @@
22
  "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
23
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
24
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
25
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
26
  "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
27
  "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -31,6 +33,7 @@
31
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
32
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
33
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
34
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
35
  "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
36
  "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -40,6 +43,7 @@
40
  "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
41
  "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
42
  "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
43
  "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
44
  "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
45
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -49,6 +53,7 @@
49
  "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
50
  "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
51
  "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
52
  "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
53
  "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
54
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -58,6 +63,7 @@
58
  "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
59
  "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
60
  "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
61
  "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
62
  "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
63
  "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -67,6 +73,7 @@
67
  "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
68
  "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
69
  "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
70
  "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
71
  "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
72
  "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -76,6 +83,7 @@
76
  "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
77
  "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
78
  "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
79
  "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
80
  "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
81
  "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -85,6 +93,7 @@
85
  "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
86
  "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
87
  "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
88
  "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
89
  "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
90
  "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -94,6 +103,7 @@
94
  "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
95
  "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
96
  "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
97
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
98
  "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
99
  "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -103,6 +113,7 @@
103
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
104
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
105
  "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
106
  "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
107
  "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
108
  "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -112,6 +123,7 @@
112
  "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
113
  "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
114
  "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
115
  "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
116
  "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
117
  "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -121,6 +133,7 @@
121
  "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
122
  "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
123
  "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
124
  "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
125
  "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
126
  "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -130,6 +143,7 @@
130
  "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
131
  "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
132
  "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
133
  "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
134
  "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
135
  "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -139,6 +153,7 @@
139
  "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
143
  "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
144
  "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
@@ -148,6 +163,7 @@
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
152
  "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
153
  "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -157,6 +173,7 @@
157
  "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
158
  "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
159
  "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
 
160
  "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
161
  "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
162
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -166,6 +183,7 @@
166
  "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
167
  "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
168
  "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
169
  "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
170
  "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
171
  "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -175,6 +193,7 @@
175
  "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
176
  "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
177
  "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
178
  "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
179
  "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
180
  "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -184,6 +203,7 @@
184
  "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
185
  "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
186
  "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
187
  "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
188
  "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
189
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -193,6 +213,7 @@
193
  "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
194
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
195
  "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
196
  "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
197
  "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
198
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -202,6 +223,7 @@
202
  "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
203
  "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
204
  "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
205
  "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
206
  "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
207
  "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -211,6 +233,7 @@
211
  "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
212
  "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
213
  "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
214
  "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
215
  "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
216
  "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -220,6 +243,7 @@
220
  "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
221
  "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
222
  "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
223
  "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
224
  "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
225
  "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -229,6 +253,7 @@
229
  "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
230
  "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
231
  "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
232
  "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
233
  "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
234
  "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
@@ -238,6 +263,7 @@
238
  "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
239
  "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
240
  "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
 
241
  "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
242
  "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
243
  "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -247,6 +273,7 @@
247
  "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
248
  "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
249
  "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
250
  "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
251
  "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
252
  "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -256,6 +283,7 @@
256
  "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
257
  "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
258
  "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
259
  "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
260
  "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
261
  "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -265,6 +293,7 @@
265
  "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
266
  "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
267
  "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
268
  "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
269
  "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
270
  "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -274,6 +303,7 @@
274
  "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
275
  "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
276
  "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
277
  "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
278
  "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
279
  "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -283,6 +313,7 @@
283
  "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
284
  "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
285
  "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
286
  "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
287
  "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
288
  "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
@@ -292,6 +323,7 @@
292
  "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
293
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
294
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
 
295
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
296
  "model.norm.weight": "pytorch_model-00003-of-00003.bin"
297
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 13476839424
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00003-of-00003.bin",
 
13
  "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
14
  "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
15
  "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
17
  "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
18
  "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
19
  "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
23
  "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
24
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
25
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
27
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
28
  "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
29
  "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
33
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
34
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
35
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
37
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
38
  "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
39
  "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
43
  "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
44
  "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
45
  "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
47
  "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
48
  "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
49
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
53
  "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
54
  "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
55
  "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
57
  "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
58
  "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
59
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
63
  "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
64
  "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
65
  "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
67
  "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
68
  "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
69
  "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
73
  "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
74
  "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
75
  "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
77
  "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
78
  "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
79
  "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
83
  "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
84
  "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
85
  "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
87
  "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
88
  "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
89
  "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
93
  "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
94
  "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
95
  "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
97
  "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
98
  "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
99
  "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
103
  "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
104
  "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
105
  "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
107
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
108
  "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
109
  "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
113
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
114
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
115
  "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
117
  "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
118
  "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
119
  "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
123
  "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
124
  "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
125
  "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
127
  "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
128
  "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
129
  "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
133
  "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
134
  "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
135
  "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
137
  "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
138
  "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
139
  "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
143
  "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
144
  "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
145
  "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
147
  "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
148
  "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
149
  "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
153
  "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
154
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
155
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
157
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
158
  "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
159
  "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
 
163
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
164
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
165
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
167
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
168
  "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
169
  "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
173
  "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
174
  "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
175
  "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
177
  "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
178
  "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
179
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
183
  "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
184
  "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
185
  "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
187
  "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
188
  "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
189
  "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
193
  "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
194
  "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
195
  "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
197
  "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
198
  "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
199
  "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
203
  "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
204
  "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
205
  "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
207
  "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
208
  "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
209
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
213
  "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
214
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
215
  "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
217
  "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
218
  "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
219
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
223
  "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
224
  "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
225
  "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
227
  "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
228
  "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
229
  "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
233
  "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
234
  "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
235
  "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
237
  "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
238
  "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
239
  "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
243
  "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
244
  "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
245
  "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
247
  "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
248
  "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
249
  "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
253
  "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
254
  "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
255
  "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
257
  "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
258
  "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
259
  "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
 
263
  "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
264
  "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
265
  "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
267
  "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
268
  "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
269
  "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
273
  "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
274
  "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
275
  "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
276
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
277
  "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
278
  "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
279
  "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
283
  "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
284
  "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
285
  "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
286
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
287
  "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
288
  "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
289
  "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
293
  "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
294
  "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
295
  "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
296
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
297
  "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
298
  "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
299
  "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
303
  "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
304
  "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
305
  "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
306
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
307
  "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
308
  "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
309
  "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
313
  "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
314
  "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
315
  "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
316
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
317
  "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
318
  "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
319
  "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
 
323
  "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
324
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
325
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
326
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
327
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
328
  "model.norm.weight": "pytorch_model-00003-of-00003.bin"
329
  }