Upload MixtralForCausalLM
Browse files- config.json +36 -0
- generation_config.json +6 -0
- model-00001-of-00035.safetensors +3 -0
- model-00002-of-00035.safetensors +3 -0
- model-00003-of-00035.safetensors +3 -0
- model-00004-of-00035.safetensors +3 -0
- model-00005-of-00035.safetensors +3 -0
- model-00006-of-00035.safetensors +3 -0
- model-00007-of-00035.safetensors +3 -0
- model-00008-of-00035.safetensors +3 -0
- model-00009-of-00035.safetensors +3 -0
- model-00010-of-00035.safetensors +3 -0
- model-00011-of-00035.safetensors +3 -0
- model-00012-of-00035.safetensors +3 -0
- model-00013-of-00035.safetensors +3 -0
- model-00014-of-00035.safetensors +3 -0
- model-00015-of-00035.safetensors +3 -0
- model-00016-of-00035.safetensors +3 -0
- model-00017-of-00035.safetensors +3 -0
- model-00018-of-00035.safetensors +3 -0
- model-00019-of-00035.safetensors +3 -0
- model-00020-of-00035.safetensors +3 -0
- model-00021-of-00035.safetensors +3 -0
- model-00022-of-00035.safetensors +3 -0
- model-00023-of-00035.safetensors +3 -0
- model-00024-of-00035.safetensors +3 -0
- model-00025-of-00035.safetensors +3 -0
- model-00026-of-00035.safetensors +3 -0
- model-00027-of-00035.safetensors +3 -0
- model-00028-of-00035.safetensors +3 -0
- model-00029-of-00035.safetensors +3 -0
- model-00030-of-00035.safetensors +3 -0
- model-00031-of-00035.safetensors +3 -0
- model-00032-of-00035.safetensors +3 -0
- model-00033-of-00035.safetensors +3 -0
- model-00034-of-00035.safetensors +3 -0
- model-00035-of-00035.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MixtralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": true,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"input_jitter_noise": 0.01,
|
13 |
+
"intermediate_size": 6400,
|
14 |
+
"lm_head_bias": true,
|
15 |
+
"lora_rank": 32,
|
16 |
+
"max_position_embeddings": 4096,
|
17 |
+
"model_type": "mixtral",
|
18 |
+
"num_attention_heads": 32,
|
19 |
+
"num_experts_per_tok": 2,
|
20 |
+
"num_hidden_layers": 32,
|
21 |
+
"num_key_value_heads": 8,
|
22 |
+
"num_local_experts": 16,
|
23 |
+
"output_router_logits": false,
|
24 |
+
"rms_norm_eps": 1e-05,
|
25 |
+
"rope_theta": 10000.0,
|
26 |
+
"router_aux_loss_coef": 0.0,
|
27 |
+
"router_jitter_noise": 0.01,
|
28 |
+
"separate_lora": false,
|
29 |
+
"sliding_window": 2047,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.46.0",
|
33 |
+
"use_cache": true,
|
34 |
+
"use_lora": true,
|
35 |
+
"vocab_size": 32064
|
36 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.46.0"
|
6 |
+
}
|
model-00001-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c38efbd4e8fe19fd6d21f2262e37c593ca512225b37962b509e47b157d087f
|
3 |
+
size 4902664728
|
model-00002-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:720d3ae8a232bd1393c237b77d9f0799f6ebbddb1f79f648fa068d7497ca8ec4
|
3 |
+
size 4901682672
|
model-00003-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b135a750fb39ded7344bb44995904d9a21d3aea34bef929a94f160a6f1729160
|
3 |
+
size 4901682680
|
model-00004-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a417cf0a4ac8d333441c0230f76df4bb9921c8516fd0695028e3f7e810ec6900
|
3 |
+
size 4901682680
|
model-00005-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b234e1a1130a0cc8e64757df8244eca14f786d25892379f6f78eed3157e886e4
|
3 |
+
size 4901682688
|
model-00006-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e455a6f3a8ddcca6b323469aa9e03d01addc496169bd5a2a62b943258c1c1636
|
3 |
+
size 4901682688
|
model-00007-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f8e469ff506eedbbf172a152fa1dd3f54273a3a2b221241ae3d1ac2169628b4
|
3 |
+
size 4901682688
|
model-00008-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92a3f36392c7e3db19d9d3ce5bddcc6bffc802620ee2e6d516ab9ff9052c1cf1
|
3 |
+
size 4901682688
|
model-00009-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9505a31f43fa5bbe6cd9047c99d54ebc10ea55e5bedd9ab6f1b1685820412952
|
3 |
+
size 4901682688
|
model-00010-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9437f75955f111fe94c56c007e0d388895ca049e427ad53b7f53a9604648be71
|
3 |
+
size 4901682688
|
model-00011-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70b5e2e95124adb3c0ee0807b5cd3a727f6a0616b010c578e234b16baf7adc20
|
3 |
+
size 4901682696
|
model-00012-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6722d2483b73bd39abdf4cc98ba20e2672a30f1cd546cf1497756daa6330e501
|
3 |
+
size 4901682744
|
model-00013-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b49a174a21b39a193b8cd23c4bf9d5e6ded5dc495cdf99df08d843e156f5a5eb
|
3 |
+
size 4901682744
|
model-00014-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3eae7b544d6b9c1dca7e31a465f94dc7aa0ff1ca1f67523e133e91118800ac4
|
3 |
+
size 4901682744
|
model-00015-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eec6019a1b0edb7c8c0cbfaa4033de31b3d17a5aa36406b191e7f9d15c280aea
|
3 |
+
size 4995504816
|
model-00016-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da0e51169f13e3bb2720d63cacd75084aff46646a4c003bf6423b9f6bea2a532
|
3 |
+
size 4939348992
|
model-00017-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8e1353d382c22367b7cd85e1bccbdb47fc05d572421760ae5886b5b0a692963
|
3 |
+
size 4901682728
|
model-00018-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b43c5f60b8df0352aa12ce0c90a2e7f8c6f15a560cf386b684488776905849ba
|
3 |
+
size 4901682728
|
model-00019-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21a130b99937407153ae62fa6fb7368563a8cf55693e43189c8e389738d597b7
|
3 |
+
size 4901682728
|
model-00020-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ad0e73c24200b8bd7d8d1f1ba6a99e3fc056399216d7ba258f83a20ee5388ad
|
3 |
+
size 4901682744
|
model-00021-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4426043916e8958ae2b404d85d83960e7d90c6f7eee649821bb04bb66dda67f4
|
3 |
+
size 4901682744
|
model-00022-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ddb548705f3419e13f059aad4c997d866e6d03f0cee79c709df5374a775576d
|
3 |
+
size 4901682744
|
model-00023-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cc25e1d83cd5e6b6212770e15d15bb024fc552b03d35b1a0aafa49d639e9fb9
|
3 |
+
size 4901682744
|
model-00024-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f961d034686f7a7e17a69e39a42ce5b1937b0be6309d1d73a4b2605f07ab4fd4
|
3 |
+
size 4901682744
|
model-00025-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a74f6d81282dda57ddb948dabdd60784780f22ebd582894f57b652645a87434
|
3 |
+
size 4901682744
|
model-00026-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cd425ab91959af8a4c82cfea593c4581bee3c17df00c324bebd21ce22d31f31
|
3 |
+
size 4901682744
|
model-00027-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f23e199520b8281e82ade269ce110c1f204bf5e8cd4d8f79bd47ae2159e6675
|
3 |
+
size 4901682744
|
model-00028-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c79f0489b1508de5f95a123d0c6abdc7413fc63a54820619cff301c69910cdb7
|
3 |
+
size 4901682744
|
model-00029-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76888037721b4a9b2f9d2d3e5a3aef157c336c07ebc86616696571aba2788860
|
3 |
+
size 4901682744
|
model-00030-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4e5b59b5cb8a10f68e9bb4945b349fdd2bc5e5e3cb567c97ca26dd575a1e53a
|
3 |
+
size 4901682744
|
model-00031-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ccee848a42fb139c40c4544b325a188ac18d460ce05c6fd693cc5476a128719
|
3 |
+
size 4901682744
|
model-00032-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c18308432d9533d617d2cb7712f0a2b4cefb69b9dcc75150f7c9917dd4252e9d
|
3 |
+
size 4995504816
|
model-00033-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d7efebe7c0a899740c57046e00217b55167d25cc2bc25f49c1deb2f343cf42e
|
3 |
+
size 4939348992
|
model-00034-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afcc508990e30d62f7edb3efb09af2f7c604e7b5d6245c4ee4ad746ce31a6602
|
3 |
+
size 4901682728
|
model-00035-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6d855724eec7d81ca551299868a104c4dddcb301b91eaae8c91e585d77a53bc
|
3 |
+
size 1049852624
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|