zhaode commited on
Commit
005325c
·
verified ·
1 Parent(s): 3524c3e

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. .gitattributes +13 -11
  2. README.md +50 -0
  3. config.json +8 -0
  4. embeddings_bf16.bin +3 -0
  5. llm.mnn +3 -0
  6. llm.mnn.json +3 -0
  7. llm.mnn.weight +3 -0
  8. llm_config.json +14 -0
  9. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -1,35 +1,37 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *.tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.db* filter=lfs diff=lfs merge=lfs -text
29
+ *.ark* filter=lfs diff=lfs merge=lfs -text
30
+ **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
31
+ **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
32
+ **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
35
+ *.mnn filter=lfs diff=lfs merge=lfs -text
36
+ *.mnn.* filter=lfs diff=lfs merge=lfs -text
37
+ *.weight filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - chat
8
+ ---
9
+ # Meta-Llama-3.1-8B-Instruct-MNN
10
+
11
+ ## Introduction
12
+ This model is a 4-bit quantized version of the MNN model exported from [Meta-Llama-3.1-8B-Instruct](https://www.modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/summary) using [llmexport](https://github.com/alibaba/MNN/tree/master/transformers/llm/export).
13
+
14
+ ## Download
15
+ ```bash
16
+ # install huggingface
17
+ pip install huggingface
18
+ ```
19
+ ```bash
20
+ # shell download
21
+ huggingface download --model 'taobao-mnn/Meta-Llama-3.1-8B-Instruct-MNN' --local_dir 'path/to/dir'
22
+ ```
23
+ ```python
24
+ # SDK download
25
+ from huggingface_hub import snapshot_download
26
+ model_dir = snapshot_download('taobao-mnn/Meta-Llama-3.1-8B-Instruct-MNN')
27
+ ```
28
+
29
+ ```bash
30
+ # git clone
31
+ git clone https://www.modelscope.cn/taobao-mnn/Meta-Llama-3.1-8B-Instruct-MNN
32
+ ```
33
+
34
+ ## Usage
35
+ ```bash
36
+ # clone MNN source
37
+ git clone https://github.com/alibaba/MNN.git
38
+
39
+ # compile
40
+ cd MNN
41
+ mkdir build && cd build
42
+ cmake .. -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true
43
+ make -j
44
+
45
+ # run
46
+ ./llm_demo /path/to/Meta-Llama-3.1-8B-Instruct-MNN/config.json prompt.txt
47
+ ```
48
+
49
+ ## Document
50
+ [MNN-LLM](https://mnn-docs.readthedocs.io/en/latest/transformers/llm.html#)
config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llm_model": "llm.mnn",
3
+ "llm_weight": "llm.mnn.weight",
4
+ "backend_type": "cpu",
5
+ "thread_num": 4,
6
+ "precision": "low",
7
+ "memory": "low"
8
+ }
embeddings_bf16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1598c85d28a153936980ff64cb0a678f1b8778d6e936bdc87796008d389a718
3
+ size 1050673152
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7bff480a2c64b5b6e05f6b5a1e2ad90497c06619556c03460d31a752cdcd91
3
+ size 2637408
llm.mnn.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6d022c7fa33394cfe8ec10d7f721689393db447876b07e5d931846c25da8bc
3
+ size 16729259
llm.mnn.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb0fc190cff25ed05ad9b82dc0f0416601ff3c3ad00d90b302503ce124838d43
3
+ size 4221375322
llm_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_size": 4096,
3
+ "layer_nums": 32,
4
+ "attention_mask": "float",
5
+ "key_value_shape": [
6
+ 2,
7
+ 1,
8
+ 0,
9
+ 8,
10
+ 128
11
+ ],
12
+ "prompt_template": "<|start_header_id|>user<|end_header_id|>\n\n%s<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
13
+ "is_visual": false
14
+ }
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff