f4b397c1183e04242c7dfa64fddb9809c58c7b0de9f4962f48aec1812d509eaa
Browse files- checkpoint/config.json +26 -0
- checkpoint/generation_config.json +6 -0
- checkpoint/pytorch_model.bin/key_to_filename.json +3 -0
- checkpoint/pytorch_model.bin/p0.model.embed_tokens.weight +3 -0
- checkpoint/pytorch_model.bin/p1.model.layers.0.self_attn.q_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p10.model.layers.1.self_attn.q_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p100.model.layers.11.self_attn.q_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p101.model.layers.11.self_attn.k_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p102.model.layers.11.self_attn.v_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p103.model.layers.11.self_attn.o_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p104.model.layers.11.mlp.gate_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p105.model.layers.11.mlp.up_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p106.model.layers.11.mlp.down_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p107.model.layers.11.input_layernorm.weight +3 -0
- checkpoint/pytorch_model.bin/p108.model.layers.11.post_attention_layernorm.weight +3 -0
- checkpoint/pytorch_model.bin/p109.model.layers.12.self_attn.q_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p11.model.layers.1.self_attn.k_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p110.model.layers.12.self_attn.k_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p111.model.layers.12.self_attn.v_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p112.model.layers.12.self_attn.o_proj.weight +3 -0
- checkpoint/pytorch_model.bin/p113.model.layers.12.mlp.gate_proj.weight +3 -0
checkpoint/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Mistral-7B-Instruct-v0.2",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 14336,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 32,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 1000000.0,
|
20 |
+
"sliding_window": 4096,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.36.2",
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 32000
|
26 |
+
}
|
checkpoint/generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.36.2"
|
6 |
+
}
|
checkpoint/pytorch_model.bin/key_to_filename.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:825d20f4a18183eff3963e805edd13ef7eb35b0aff7a850e8153ca1eeeb37970
|
3 |
+
size 26397
|
checkpoint/pytorch_model.bin/p0.model.embed_tokens.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f84f240f56b9b91a4eb546cf2763b9669ab2322e7a4d11ec408029f1e0ccf3f
|
3 |
+
size 524288789
|
checkpoint/pytorch_model.bin/p1.model.layers.0.self_attn.q_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b9f59084e4ae45d0b66bb22229aae343297240dae5f6e4cbc43c6359e6bb6b1
|
3 |
+
size 67109756
|
checkpoint/pytorch_model.bin/p10.model.layers.1.self_attn.q_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62eead0f0fb665ee7d32dba7ab975aa0b363384d1a282d9f5979d9b3f4f83a79
|
3 |
+
size 67109759
|
checkpoint/pytorch_model.bin/p100.model.layers.11.self_attn.q_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a590c98c41d2bfc4031baba4a682ea0dec309a11332a64af3a61b95046c5941a
|
3 |
+
size 67109765
|
checkpoint/pytorch_model.bin/p101.model.layers.11.self_attn.k_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e53f21af0b3434d12d6265df2d228b179fdddbda2a125d549c669d54847631
|
3 |
+
size 16778117
|
checkpoint/pytorch_model.bin/p102.model.layers.11.self_attn.v_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9316efe8dc37ab8c49c95904f0a9897a4589457f7a4432cb642c02cc84d8823
|
3 |
+
size 16778117
|
checkpoint/pytorch_model.bin/p103.model.layers.11.self_attn.o_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2d08d5319c0e0bf37103ece966f1bc576090c12366fe1f4631f2e1ba1678038
|
3 |
+
size 67109765
|
checkpoint/pytorch_model.bin/p104.model.layers.11.mlp.gate_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ab7fa48bd4b58319e976722893fb464ad0cd6b2b51afad3217604885c5b6419
|
3 |
+
size 234881916
|
checkpoint/pytorch_model.bin/p105.model.layers.11.mlp.up_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:704c2e3eeecb7412a1716b1923910c38d06fde684cdd9a85b75305ca6b131b1a
|
3 |
+
size 234881910
|
checkpoint/pytorch_model.bin/p106.model.layers.11.mlp.down_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c25fed67c919b8cf27459ac82f1b08ac24ef52d495c224d44ce062c68547b9e1
|
3 |
+
size 234881916
|
checkpoint/pytorch_model.bin/p107.model.layers.11.input_layernorm.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8872587aa5a7c6b09a03ad8978ebc59736a87a6409a7252e26e838a91988bb4d
|
3 |
+
size 17282
|
checkpoint/pytorch_model.bin/p108.model.layers.11.post_attention_layernorm.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9e6e5654ffc98758d06bbc75a504312e5b2d8c9f6b5d044d95a14b012c677a3
|
3 |
+
size 17309
|
checkpoint/pytorch_model.bin/p109.model.layers.12.self_attn.q_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6041795aa0bfb94f5456728f83172735b06189fac06f328f3f78261f319eff3
|
3 |
+
size 67109765
|
checkpoint/pytorch_model.bin/p11.model.layers.1.self_attn.k_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5d238915253ae0c29b2d3ece0956c782bf7b1b52030ce39b4460a61e8f23d1b
|
3 |
+
size 16778111
|
checkpoint/pytorch_model.bin/p110.model.layers.12.self_attn.k_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60d228cf0d1c86750b7a01b5d8373de88283b8ae118f29a7b2a1fe8693beeb48
|
3 |
+
size 16778117
|
checkpoint/pytorch_model.bin/p111.model.layers.12.self_attn.v_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a04709843b44812e3076d64e28b01daad070e4443e8cd4d0d53ecbfec9ce4af8
|
3 |
+
size 16778117
|
checkpoint/pytorch_model.bin/p112.model.layers.12.self_attn.o_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17aea5825bdea74baaba107e5e2c3d71716a4946dab45ed6484dcae2dfbed235
|
3 |
+
size 67109765
|
checkpoint/pytorch_model.bin/p113.model.layers.12.mlp.gate_proj.weight
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b251efe132fa6b87abb0e992a46dba3f8e1187b23f9317eaa36f360c64b78d58
|
3 |
+
size 234881916
|