Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
23 |
processor = AutoProcessor.from_pretrained(MODEL_DIR)
|
24 |
|
25 |
# --------- Chat Inference Function ---------
|
26 |
-
def chat_qwen_vl(
|
27 |
# —— 原有多模态输入构造 —— #
|
28 |
text = processor.apply_chat_template(
|
29 |
messages, tokenize=False, add_generation_prompt=True
|
@@ -51,8 +51,8 @@ def chat_qwen_vl(messages):
|
|
51 |
**inputs, # 包含 input_ids, pixel_values, attention_mask 等
|
52 |
streamer=streamer, # 关键:挂载 streamer
|
53 |
top_k=1024,
|
54 |
-
max_new_tokens=
|
55 |
-
temperature=
|
56 |
top_p=0.1,
|
57 |
eos_token_id=terminators, # 你的结束符 ID 列表
|
58 |
)
|
@@ -70,57 +70,6 @@ def chat_qwen_vl(messages):
|
|
70 |
# 每次拿到新片段就拼接并输出
|
71 |
yield "".join(buffer)
|
72 |
|
73 |
-
def chat_qwen_vl_(message: str, history: list, temperature: float = 0.7, max_new_tokens: int = 1024):
|
74 |
-
"""
|
75 |
-
Stream chat response from local Qwen2.5-VL model.
|
76 |
-
"""
|
77 |
-
# Build conversation prompt
|
78 |
-
conv = []
|
79 |
-
for u, a in history:
|
80 |
-
conv.append(f"<user> {u}")
|
81 |
-
conv.append(f"<assistant> {a}")
|
82 |
-
conv.append(f"<user> {message}")
|
83 |
-
conv.append("<assistant>")
|
84 |
-
|
85 |
-
# Tokenize
|
86 |
-
inputs = tokenizer(
|
87 |
-
"\n".join(conv),
|
88 |
-
return_tensors="pt",
|
89 |
-
truncation=True,
|
90 |
-
max_length=4096
|
91 |
-
).to(model.device)
|
92 |
-
|
93 |
-
# Create streamer
|
94 |
-
streamer = TextIteratorStreamer(
|
95 |
-
tokenizer,
|
96 |
-
timeout=10.0,
|
97 |
-
skip_prompt=True,
|
98 |
-
skip_special_tokens=True
|
99 |
-
)
|
100 |
-
|
101 |
-
# Generation kwargs
|
102 |
-
gen_kwargs = dict(
|
103 |
-
input_ids=inputs.input_ids,
|
104 |
-
attention_mask=inputs.attention_mask,
|
105 |
-
streamer=streamer,
|
106 |
-
do_sample=(temperature > 0),
|
107 |
-
temperature=temperature,
|
108 |
-
max_new_tokens=max_new_tokens,
|
109 |
-
eos_token_id=terminators,
|
110 |
-
)
|
111 |
-
if temperature == 0:
|
112 |
-
gen_kwargs["do_sample"] = False
|
113 |
-
|
114 |
-
# Launch generation in thread
|
115 |
-
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
116 |
-
thread.start()
|
117 |
-
|
118 |
-
# Stream outputs
|
119 |
-
output_chunks = []
|
120 |
-
for chunk in streamer:
|
121 |
-
output_chunks.append(chunk)
|
122 |
-
yield "".join(output_chunks)
|
123 |
-
|
124 |
# --------- 3D Mesh Coloring Function ---------
|
125 |
def apply_gradient_color(mesh_text: str) -> str:
|
126 |
"""
|
|
|
23 |
processor = AutoProcessor.from_pretrained(MODEL_DIR)
|
24 |
|
25 |
# --------- Chat Inference Function ---------
|
26 |
+
def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
|
27 |
# —— 原有多模态输入构造 —— #
|
28 |
text = processor.apply_chat_template(
|
29 |
messages, tokenize=False, add_generation_prompt=True
|
|
|
51 |
**inputs, # 包含 input_ids, pixel_values, attention_mask 等
|
52 |
streamer=streamer, # 关键:挂载 streamer
|
53 |
top_k=1024,
|
54 |
+
max_new_tokens=max_new_tokens,
|
55 |
+
temperature=temperature,
|
56 |
top_p=0.1,
|
57 |
eos_token_id=terminators, # 你的结束符 ID 列表
|
58 |
)
|
|
|
70 |
# 每次拿到新片段就拼接并输出
|
71 |
yield "".join(buffer)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
# --------- 3D Mesh Coloring Function ---------
|
74 |
def apply_gradient_color(mesh_text: str) -> str:
|
75 |
"""
|