Upload MixtralForCausalLM
Browse files- config.json +36 -0
- generation_config.json +6 -0
- model-00001-of-00035.safetensors +3 -0
- model-00002-of-00035.safetensors +3 -0
- model-00003-of-00035.safetensors +3 -0
- model-00004-of-00035.safetensors +3 -0
- model-00005-of-00035.safetensors +3 -0
- model-00006-of-00035.safetensors +3 -0
- model-00007-of-00035.safetensors +3 -0
- model-00008-of-00035.safetensors +3 -0
- model-00009-of-00035.safetensors +3 -0
- model-00010-of-00035.safetensors +3 -0
- model-00011-of-00035.safetensors +3 -0
- model-00012-of-00035.safetensors +3 -0
- model-00013-of-00035.safetensors +3 -0
- model-00014-of-00035.safetensors +3 -0
- model-00015-of-00035.safetensors +3 -0
- model-00016-of-00035.safetensors +3 -0
- model-00017-of-00035.safetensors +3 -0
- model-00018-of-00035.safetensors +3 -0
- model-00019-of-00035.safetensors +3 -0
- model-00020-of-00035.safetensors +3 -0
- model-00021-of-00035.safetensors +3 -0
- model-00022-of-00035.safetensors +3 -0
- model-00023-of-00035.safetensors +3 -0
- model-00024-of-00035.safetensors +3 -0
- model-00025-of-00035.safetensors +3 -0
- model-00026-of-00035.safetensors +3 -0
- model-00027-of-00035.safetensors +3 -0
- model-00028-of-00035.safetensors +3 -0
- model-00029-of-00035.safetensors +3 -0
- model-00030-of-00035.safetensors +3 -0
- model-00031-of-00035.safetensors +3 -0
- model-00032-of-00035.safetensors +3 -0
- model-00033-of-00035.safetensors +3 -0
- model-00034-of-00035.safetensors +3 -0
- model-00035-of-00035.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MixtralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": true,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"input_jitter_noise": 0.01,
|
13 |
+
"intermediate_size": 6400,
|
14 |
+
"lm_head_bias": true,
|
15 |
+
"lora_rank": 16,
|
16 |
+
"max_position_embeddings": 4096,
|
17 |
+
"model_type": "mixtral",
|
18 |
+
"num_attention_heads": 32,
|
19 |
+
"num_experts_per_tok": 2,
|
20 |
+
"num_hidden_layers": 32,
|
21 |
+
"num_key_value_heads": 8,
|
22 |
+
"num_local_experts": 16,
|
23 |
+
"output_router_logits": false,
|
24 |
+
"rms_norm_eps": 1e-05,
|
25 |
+
"rope_theta": 10000.0,
|
26 |
+
"router_aux_loss_coef": 0.0,
|
27 |
+
"router_jitter_noise": 0.01,
|
28 |
+
"separate_lora": true,
|
29 |
+
"sliding_window": 2047,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.45.2",
|
33 |
+
"use_cache": true,
|
34 |
+
"use_lora": true,
|
35 |
+
"vocab_size": 32064
|
36 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.45.2"
|
6 |
+
}
|
model-00001-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfb9ceeed692d85c00071db4b456df8fc45fb6a88a3c722985e5ba78156ec2a7
|
3 |
+
size 4992579888
|
model-00002-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cf4d0452ce616a391ccce6b7ccb8cb629f2a1ade95befc62a4a19701b130857
|
3 |
+
size 4918996520
|
model-00003-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:319e8adbc674f8e6412de03f0104240cb1c2558666705d131b13cf728cde0865
|
3 |
+
size 4918996720
|
model-00004-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68ea293b0c44865ee8ca3f24467df9d7e28d4e7f5db4f3f6c49e16b1ef166f0c
|
3 |
+
size 4918996720
|
model-00005-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f58906a680b4f7208a5d6d2c6308f4359b875395556586c9e46bf35541652df
|
3 |
+
size 4918996720
|
model-00006-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40d5eaf2779461637b45f2cc8339321113e3be2aaca8b9c4701c64a387300046
|
3 |
+
size 4918996728
|
model-00007-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33abd1581236c8592f0451793caa18c96ff4372b1b91c0b24366bf55a3f42ca4
|
3 |
+
size 4918996728
|
model-00008-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:877cd94986fe886bb2f6dd88a7cf5321bbb3970b8df3a206c401838fb19168d2
|
3 |
+
size 4918996728
|
model-00009-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ee0ba66f2f8b4bdd6afbfd7fc491af1e7208d58629b6b2ec6da5b6081d80efd
|
3 |
+
size 4918996728
|
model-00010-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6c947e8f0e99c71c8237964f8e074d949dbd677f9e57313ead07e2475a5065a
|
3 |
+
size 4918996728
|
model-00011-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:486c636a0391e0c4ee7c250e90024c427b21c38322a24b861c00ffd492b5cb01
|
3 |
+
size 4918996744
|
model-00012-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:967a3ac6a68602080def174360742257c20c6163ec64cf81f2817fa0926c63d8
|
3 |
+
size 4918996880
|
model-00013-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cbe7123562084a4e5dfef5a9a2dd33792d0138249a533f0f888d546d06f968f
|
3 |
+
size 4918996880
|
model-00014-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:826b9ed170d646d01848620caee32e3aba1cd6cbc8d388412d44fcb65d931685
|
3 |
+
size 4918996880
|
model-00015-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7d4192f2a2db5eec0fcaf2906e7e69675ba10a13677623ad98415f84a452001
|
3 |
+
size 4956466872
|
model-00016-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02c9830bed02c7688ba6f0ed5afd9672c81a46537ffbccab2a81ecd966fdf3fa
|
3 |
+
size 4995701080
|
model-00017-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6049be07813b76c6accfe3c66106ab85c8257f5a459b4ea6fa9e7d077f285343
|
3 |
+
size 4918996672
|
model-00018-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc0ecf66bb9b4e69dc04d421b2761091c6e846a26bbed1b602a63ceb653ef7ea
|
3 |
+
size 4918996672
|
model-00019-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94d4b12db9664f2532566fbcd80af189c45dcdf7501f0fad2e4c02f20988a6c5
|
3 |
+
size 4918996672
|
model-00020-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a4749131fef0d43107fa35509e7ef2a311cba7a89bced3e0caa07c0e28a175
|
3 |
+
size 4918996872
|
model-00021-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02d339ea267b4f9b0af22f9468821f1857114426c03df328caa10e33093c0252
|
3 |
+
size 4918996872
|
model-00022-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900b564ce097f15e3bf137b86db5891370e162ec1b2abf6c8193f3b73fdc993f
|
3 |
+
size 4918996880
|
model-00023-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bdeac8f1c13cafba90ef9d64d4418efdd3d7088bb69bbedc9407be508273d07
|
3 |
+
size 4918996880
|
model-00024-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c05d3eb66d7e5fe73fc84b3ae220bf4721f5dca67a570d6dd9404f812adfb651
|
3 |
+
size 4918996880
|
model-00025-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64dfb3407e217b57becb4aa184a4b6ac68d2505c380ec5fe5b88ad9a6ff01e3c
|
3 |
+
size 4918996880
|
model-00026-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f941e2cca5690a54c11b73437d324f922aae566ab6e18b13ae838a1070947f75
|
3 |
+
size 4918996880
|
model-00027-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc969f6b76b43acb12751dd12883424f0a65d3cbaf9eb1598d543e9f7bd54cdf
|
3 |
+
size 4918996880
|
model-00028-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df59b0c6c26a58bf9fcaf72d6feac2f82cdc9d003f6656ed6748619ca065d897
|
3 |
+
size 4918996880
|
model-00029-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8e3c1759d043e21c353ffd3b3025b779e9ab2d297b8b6737b0326528c91ab7e
|
3 |
+
size 4918996880
|
model-00030-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a27d4e8419e57bbd7c0494790022dcfde7caac190fd8a6255e2cf51fbc5352d
|
3 |
+
size 4918996880
|
model-00031-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36263f7bdb6fd3008b74edaa82cc811088679ce7a196022f03ba1a657e153f98
|
3 |
+
size 4918996880
|
model-00032-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6411686ac9351c28292b9fa4e183a5d75adeae945a58ec16fcb4c70eef8dc4ba
|
3 |
+
size 4956466872
|
model-00033-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b7f06759149400ed49c6e927adaf8aed8d03f5b2dae4ba13b872fb3865bde5f
|
3 |
+
size 4995701080
|
model-00034-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0bb46b0d018c019b3011d562a97e1cb139663fb7b72d0c0b0e8894319c367d2
|
3 |
+
size 4918996672
|
model-00035-of-00035.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b32e03ca1beb74b8f99186d5b223a85ddec27d5edd18028853e1a6e46db0f26
|
3 |
+
size 977251400
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|