Spaces:
Running
Running
Upload 2 files
Browse files- app.py +13 -8
- requirements.txt +1 -2
app.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
3 |
import torch
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
model = AutoModelForCausalLM.from_pretrained(
|
9 |
model_name,
|
10 |
torch_dtype=torch.float16, # 使用 FP16 减少内存
|
11 |
-
device_map="cpu", #
|
12 |
trust_remote_code=True,
|
13 |
low_cpu_mem_usage=True
|
14 |
)
|
@@ -31,8 +31,13 @@ def respond(
|
|
31 |
|
32 |
messages.append({"role": "user", "content": message})
|
33 |
|
34 |
-
#
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
inputs = tokenizer(input_text, return_tensors="pt").to("cpu")
|
37 |
|
38 |
# 生成响应
|
@@ -52,7 +57,7 @@ demo = gr.ChatInterface(
|
|
52 |
respond,
|
53 |
additional_inputs=[
|
54 |
gr.Textbox(value="You are a friendly coding assistant.", label="System message"),
|
55 |
-
gr.Slider(minimum=1, maximum=2048, value=
|
56 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
57 |
gr.Slider(
|
58 |
minimum=0.1,
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
import torch
|
4 |
|
5 |
+
# 加载模型
|
6 |
+
model_name = "deepseek-ai/deepseek-coder-1.3b-base" # 可替换为 "deepseek-ai/deepseek-coder-1.3b-instruct"
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
8 |
model = AutoModelForCausalLM.from_pretrained(
|
9 |
model_name,
|
10 |
torch_dtype=torch.float16, # 使用 FP16 减少内存
|
11 |
+
device_map="cpu", # 强制 CPU
|
12 |
trust_remote_code=True,
|
13 |
low_cpu_mem_usage=True
|
14 |
)
|
|
|
31 |
|
32 |
messages.append({"role": "user", "content": message})
|
33 |
|
34 |
+
# 使用聊天模板格式化输入(base 模型可能无模板,需调整)
|
35 |
+
try:
|
36 |
+
input_text = tokenizer.apply_chat_template(messages, tokenize=False)
|
37 |
+
except:
|
38 |
+
# 如果 base 模型无聊天模板,直接拼接
|
39 |
+
input_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
|
40 |
+
|
41 |
inputs = tokenizer(input_text, return_tensors="pt").to("cpu")
|
42 |
|
43 |
# 生成响应
|
|
|
57 |
respond,
|
58 |
additional_inputs=[
|
59 |
gr.Textbox(value="You are a friendly coding assistant.", label="System message"),
|
60 |
+
gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"), # 降低以加快 CPU 推理
|
61 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
62 |
gr.Slider(
|
63 |
minimum=0.1,
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
torch>=2.0.0
|
2 |
transformers>=4.38.0
|
3 |
accelerate>=0.26.0
|
4 |
-
gradio>=4.0.0
|
5 |
-
bitsandbytes>=0.43.3
|
|
|
1 |
torch>=2.0.0
|
2 |
transformers>=4.38.0
|
3 |
accelerate>=0.26.0
|
4 |
+
gradio>=4.0.0
|
|