File size: 806 Bytes
d8d694f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from pathlib import Path
import streamlit as st
from lmdeploy import TurbomindEngineConfig, pipeline
from modelscope import snapshot_download
from utils.web_configs import WEB_CONFIGS
@st.cache_resource
def load_turbomind_model(model_dir): # hf awq
print("load model begin.")
model_format = "hf"
if Path(model_dir).stem.endswith("-4bit"):
model_format = "awq"
model_dir = snapshot_download(model_dir, revision="master", cache_dir=WEB_CONFIGS.LLM_MODEL_DIR)
backend_config = TurbomindEngineConfig(
model_format=model_format, session_len=32768, cache_max_entry_count=WEB_CONFIGS.CACHE_MAX_ENTRY_COUNT
)
pipe = pipeline(model_dir, backend_config=backend_config, log_level="INFO", model_name="internlm2")
print("load model end.")
return pipe
|