yejunliang23 commited on
Commit
5994dcd
·
unverified ·
1 Parent(s): ca61faf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -54
app.py CHANGED
@@ -23,7 +23,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
23
  processor = AutoProcessor.from_pretrained(MODEL_DIR)
24
 
25
  # --------- Chat Inference Function ---------
26
- def chat_qwen_vl(messages):
27
  # —— 原有多模态输入构造 —— #
28
  text = processor.apply_chat_template(
29
  messages, tokenize=False, add_generation_prompt=True
@@ -51,8 +51,8 @@ def chat_qwen_vl(messages):
51
  **inputs, # 包含 input_ids, pixel_values, attention_mask 等
52
  streamer=streamer, # 关键:挂载 streamer
53
  top_k=1024,
54
- max_new_tokens=1280,
55
- temperature=0.1,
56
  top_p=0.1,
57
  eos_token_id=terminators, # 你的结束符 ID 列表
58
  )
@@ -70,57 +70,6 @@ def chat_qwen_vl(messages):
70
  # 每次拿到新片段就拼接并输出
71
  yield "".join(buffer)
72
 
73
- def chat_qwen_vl_(message: str, history: list, temperature: float = 0.7, max_new_tokens: int = 1024):
74
- """
75
- Stream chat response from local Qwen2.5-VL model.
76
- """
77
- # Build conversation prompt
78
- conv = []
79
- for u, a in history:
80
- conv.append(f"<user> {u}")
81
- conv.append(f"<assistant> {a}")
82
- conv.append(f"<user> {message}")
83
- conv.append("<assistant>")
84
-
85
- # Tokenize
86
- inputs = tokenizer(
87
- "\n".join(conv),
88
- return_tensors="pt",
89
- truncation=True,
90
- max_length=4096
91
- ).to(model.device)
92
-
93
- # Create streamer
94
- streamer = TextIteratorStreamer(
95
- tokenizer,
96
- timeout=10.0,
97
- skip_prompt=True,
98
- skip_special_tokens=True
99
- )
100
-
101
- # Generation kwargs
102
- gen_kwargs = dict(
103
- input_ids=inputs.input_ids,
104
- attention_mask=inputs.attention_mask,
105
- streamer=streamer,
106
- do_sample=(temperature > 0),
107
- temperature=temperature,
108
- max_new_tokens=max_new_tokens,
109
- eos_token_id=terminators,
110
- )
111
- if temperature == 0:
112
- gen_kwargs["do_sample"] = False
113
-
114
- # Launch generation in thread
115
- thread = Thread(target=model.generate, kwargs=gen_kwargs)
116
- thread.start()
117
-
118
- # Stream outputs
119
- output_chunks = []
120
- for chunk in streamer:
121
- output_chunks.append(chunk)
122
- yield "".join(output_chunks)
123
-
124
  # --------- 3D Mesh Coloring Function ---------
125
  def apply_gradient_color(mesh_text: str) -> str:
126
  """
 
23
  processor = AutoProcessor.from_pretrained(MODEL_DIR)
24
 
25
  # --------- Chat Inference Function ---------
26
+ def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
27
  # —— 原有多模态输入构造 —— #
28
  text = processor.apply_chat_template(
29
  messages, tokenize=False, add_generation_prompt=True
 
51
  **inputs, # 包含 input_ids, pixel_values, attention_mask 等
52
  streamer=streamer, # 关键:挂载 streamer
53
  top_k=1024,
54
+ max_new_tokens=max_new_tokens,
55
+ temperature=temperature,
56
  top_p=0.1,
57
  eos_token_id=terminators, # 你的结束符 ID 列表
58
  )
 
70
  # 每次拿到新片段就拼接并输出
71
  yield "".join(buffer)
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # --------- 3D Mesh Coloring Function ---------
74
  def apply_gradient_color(mesh_text: str) -> str:
75
  """