Spaces:

yejunliang23
/

ShapLLM-Omni

Running on Zero

App Files Files Community

yejunliang23 commited on May 26

Commit

5994dcd

unverified ·

1 Parent(s): ca61faf

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -54

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 processor = AutoProcessor.from_pretrained(MODEL_DIR)
 # --------- Chat Inference Function ---------
-def chat_qwen_vl(messages):
     # —— 原有多模态输入构造 —— #
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
@@ -51,8 +51,8 @@ def chat_qwen_vl(messages):
         **inputs,           # 包含 input_ids, pixel_values, attention_mask 等
         streamer=streamer,  # 关键：挂载 streamer
         top_k=1024,
-        max_new_tokens=1280,
-        temperature=0.1,
         top_p=0.1,
         eos_token_id=terminators,  # 你的结束符 ID 列表
     )
@@ -70,57 +70,6 @@ def chat_qwen_vl(messages):
         # 每次拿到新片段就拼接并输出
         yield "".join(buffer)
-def chat_qwen_vl_(message: str, history: list, temperature: float = 0.7, max_new_tokens: int = 1024):
-    """
-    Stream chat response from local Qwen2.5-VL model.
-    """
-    # Build conversation prompt
-    conv = []
-    for u, a in history:
-        conv.append(f"<user> {u}")
-        conv.append(f"<assistant> {a}")
-    conv.append(f"<user> {message}")
-    conv.append("<assistant>")
-    # Tokenize
-    inputs = tokenizer(
-        "\n".join(conv),
-        return_tensors="pt",
-        truncation=True,
-        max_length=4096
-    ).to(model.device)
-    # Create streamer
-    streamer = TextIteratorStreamer(
-        tokenizer,
-        timeout=10.0,
-        skip_prompt=True,
-        skip_special_tokens=True
-    )
-    # Generation kwargs
-    gen_kwargs = dict(
-        input_ids=inputs.input_ids,
-        attention_mask=inputs.attention_mask,
-        streamer=streamer,
-        do_sample=(temperature > 0),
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        eos_token_id=terminators,
-    )
-    if temperature == 0:
-        gen_kwargs["do_sample"] = False
-    # Launch generation in thread
-    thread = Thread(target=model.generate, kwargs=gen_kwargs)
-    thread.start()
-    # Stream outputs
-    output_chunks = []
-    for chunk in streamer:
-        output_chunks.append(chunk)
-        yield "".join(output_chunks)
 # --------- 3D Mesh Coloring Function ---------
 def apply_gradient_color(mesh_text: str) -> str:
     """

 processor = AutoProcessor.from_pretrained(MODEL_DIR)
 # --------- Chat Inference Function ---------
+def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
     # —— 原有多模态输入构造 —— #
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
         **inputs,           # 包含 input_ids, pixel_values, attention_mask 等
         streamer=streamer,  # 关键：挂载 streamer
         top_k=1024,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
         top_p=0.1,
         eos_token_id=terminators,  # 你的结束符 ID 列表
     )
         # 每次拿到新片段就拼接并输出
         yield "".join(buffer)
 # --------- 3D Mesh Coloring Function ---------
 def apply_gradient_color(mesh_text: str) -> str:
     """