File size: 1,859 Bytes
71ec309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e8052d
71ec309
 
 
 
 
 
 
 
9e8052d
71ec309
 
 
 
 
 
 
9e8052d
71ec309
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash

# ๅฎšไน‰ๅ˜้‡
MODEL_OUTPUT_PATH="./bundle"
QUANTIZATION="q0f16"

MODEL_PATH="Llama3.1-8B-Instruct-Tinytron"
MODEL_NAME="Llama3.1-8B-Instruct-Tinytron-MLC"
# ่ฝฌๆขๆƒ้‡
mlc_llm convert_weight --device cpu ${MODEL_PATH}/ --quantization ${QUANTIZATION} -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

# ็”Ÿๆˆ้…็ฝฎ
mlc_llm gen_config ${MODEL_PATH}  --conv-template llama-3_1 --quantization ${QUANTIZATION} --context-window-size 512 --prefill-chunk-size=16 --max-batch-size=1 -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/


# ๅˆ‡ๆขๆจกๅž‹
MODEL_PATH="Qwen2-7B-Instruct-Tinytron"
MODEL_NAME="Qwen2-7B-Instruct-Tinytron-MLC"
# ่ฝฌๆขๆƒ้‡
mlc_llm convert_weight --device cpu ${MODEL_PATH} --quantization ${QUANTIZATION} -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

# ็”Ÿๆˆ้…็ฝฎ
mlc_llm gen_config ${MODEL_PATH}  --conv-template qwen2 --quantization ${QUANTIZATION} --context-window-size 512 --prefill-chunk-size=16 --max-batch-size=1 -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

# ๅˆ‡ๆขๆจกๅž‹
MODEL_PATH="Phi-2-Tinytron-preview"
MODEL_NAME="Phi-2-Tinytron-preview-MLC"
# ่ฝฌๆขๆƒ้‡
mlc_llm convert_weight --device cpu ${MODEL_PATH} --quantization ${QUANTIZATION} -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

# ็”Ÿๆˆ้…็ฝฎ
mlc_llm gen_config ${MODEL_PATH}  --conv-template qwen2 --quantization ${QUANTIZATION} --context-window-size 512 --prefill-chunk-size=16 --max-batch-size=1 -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

MODEL_PATH="Cauchy-3B-preview"
MODEL_NAME="Cauchy-3B-preview-MLC"
# ่ฝฌๆขๆƒ้‡
mlc_llm convert_weight --model-type cauchy  --device cpu ${MODEL_PATH} --quantization ${QUANTIZATION} -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/

# ็”Ÿๆˆ้…็ฝฎ
mlc_llm gen_config ${MODEL_PATH} --model-type cauchy --conv-template qwen2 --quantization ${QUANTIZATION} --context-window-size 512 --prefill-chunk-size=16 --max-batch-size=1 -o ${MODEL_OUTPUT_PATH}/${MODEL_NAME}/