shrenikb commited on
Commit
dbd433e
·
1 Parent(s): 500f36e

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -14,7 +14,7 @@
14
  "max_sequence_length": 2048,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
- "num_hidden_layers": 32,
18
  "num_key_value_heads": 32,
19
  "pad_token_id": 0,
20
  "pretraining_tp": 1,
@@ -24,6 +24,6 @@
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.35.2",
27
- "use_cache": false,
28
  "vocab_size": 32000
29
  }
 
14
  "max_sequence_length": 2048,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 24,
18
  "num_key_value_heads": 32,
19
  "pad_token_id": 0,
20
  "pretraining_tp": 1,
 
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.35.2",
27
+ "use_cache": true,
28
  "vocab_size": 32000
29
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db24301ee7d36660b7788461db1dca0395fe9c5acc6e582ef2c75e6f40571fe4
3
  size 4938985248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eebc4276f927037db1652c4b4067ff5749b56fec089e0cb09de6b7ab69f2979e
3
  size 4938985248
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f31c03c6b3fbca42c22db0855fe40a86210bb88cc6a5cacd458353e1dc88b8a
3
  size 4947390768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643a732453037a98006c7414491c6e305e86d8fe62fe74ba51b7dda6c1273564
3
  size 4947390768
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1da8afa84d40e98529d473998824435b54edfa994d8bcef617699de87aaed468
3
- size 3590488736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ff11b7da9148bf631e3c0c2dd4c77c3722714ec7f3b77bddc15d725ea6a129
3
+ size 352346664
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 13476831232
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -158,60 +158,6 @@
158
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
159
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
160
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
161
- "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
- "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
- "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
- "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
- "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
- "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
- "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
- "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
- "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
- "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
- "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
- "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
- "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
- "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
- "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
- "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
- "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
- "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
- "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
- "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
- "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
- "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
- "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
- "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
- "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
- "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
- "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
- "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
- "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
- "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
- "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
- "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
- "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
- "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
- "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
- "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
- "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
- "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
- "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
- "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
- "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
- "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
- "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
- "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
- "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
- "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
- "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
- "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
- "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
- "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
- "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
- "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
- "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
- "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
  "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
@@ -221,24 +167,6 @@
221
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
- "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
- "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
- "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
- "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
- "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
- "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
- "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
- "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
- "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
- "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
- "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
- "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
- "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
- "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
- "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
- "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
- "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
- "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 10238697472
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
158
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
159
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
160
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
162
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
163
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
167
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
168
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
169
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
171
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
172
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",