yejunliang23 commited on
Commit
d5b7fec
·
unverified ·
1 Parent(s): 52f32fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -4
app.py CHANGED
@@ -9,6 +9,8 @@ from trimesh.exchange.gltf import export_glb
9
  import numpy as np
10
  import tempfile
11
  import copy
 
 
12
 
13
  def _remove_image_special(text):
14
  text = text.replace('<ref>', '').replace('</ref>', '')
@@ -19,7 +21,7 @@ def is_video_file(filename):
19
  video_extensions = ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.mpeg']
20
  return any(filename.lower().endswith(ext) for ext in video_extensions)
21
 
22
- def predict(_chatbot, task_history):
23
  chat_query = _chatbot[-1][0]
24
  query = task_history[-1][0]
25
  if len(chat_query) == 0:
@@ -43,24 +45,98 @@ def predict(_chatbot, task_history):
43
  messages.append({'role': 'assistant', 'content': [{'text': a}]})
44
  content = []
45
  messages.pop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  messages = _transform_messages(messages)
48
  text = processor.apply_chat_template(
49
  messages, tokenize=False, add_generation_prompt=True)
50
- print(text)
51
  image_inputs, video_inputs = process_vision_info(messages)
52
  inputs = processor(text=[text], images=image_inputs,
53
  videos=video_inputs, padding=True, return_tensors='pt')
54
  inputs = inputs.to(model.device)
55
 
56
  streamer = TextIteratorStreamer(
57
- tokenizer, timeout=2000.0, skip_prompt=True, skip_special_tokens=True)
58
 
59
  gen_kwargs = {'max_new_tokens': 512, 'streamer': streamer, **inputs}
60
 
61
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
62
  thread.start()
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  #for new_text in streamer:
65
  # yield new_text
66
 
@@ -68,6 +144,7 @@ def predict(_chatbot, task_history):
68
  for chunk in streamer:
69
  buffer.append(chunk)
70
  yield "".join(buffer)
 
71
 
72
 
73
  def regenerate(_chatbot, task_history):
@@ -192,7 +269,7 @@ def chat_qwen_vl(messages: str, history: list, temperature: float = 0.1, max_new
192
  streamer = TextIteratorStreamer(
193
  tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
194
 
195
- gen_kwargs = {'max_new_tokens': 1024, 'streamer': streamer, **inputs}
196
 
197
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
198
  thread.start()
 
9
  import numpy as np
10
  import tempfile
11
  import copy
12
+ from dashscope import MultiModalConversation
13
+ import dashscope
14
 
15
  def _remove_image_special(text):
16
  text = text.replace('<ref>', '').replace('</ref>', '')
 
21
  video_extensions = ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.mpeg']
22
  return any(filename.lower().endswith(ext) for ext in video_extensions)
23
 
24
+ def predict_(_chatbot, task_history):
25
  chat_query = _chatbot[-1][0]
26
  query = task_history[-1][0]
27
  if len(chat_query) == 0:
 
45
  messages.append({'role': 'assistant', 'content': [{'text': a}]})
46
  content = []
47
  messages.pop()
48
+ responses = MultiModalConversation.call(
49
+ model="Qwen/Qwen2.5-VL-3B-Instruct", messages=messages, stream=True,
50
+ )
51
+ for response in responses:
52
+ if not response.status_code == HTTPStatus.OK:
53
+ raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}')
54
+ response = response.output.choices[0].message.content
55
+ response_text = []
56
+ for ele in response:
57
+ if 'text' in ele:
58
+ response_text.append(ele['text'])
59
+ elif 'box' in ele:
60
+ response_text.append(ele['box'])
61
+ response_text = ''.join(response_text)
62
+ _chatbot[-1] = (_parse_text(chat_query), _remove_image_special(response_text))
63
+ yield _chatbot
64
+
65
+ if len(response) > 1:
66
+ result_image = response[-1]['result_image']
67
+ resp = requests.get(result_image)
68
+ os.makedirs(uploaded_file_dir, exist_ok=True)
69
+ name = f"tmp{secrets.token_hex(20)}.jpg"
70
+ filename = os.path.join(uploaded_file_dir, name)
71
+ with open(filename, 'wb') as f:
72
+ f.write(resp.content)
73
+ response = ''.join(r['box'] if 'box' in r else r['text'] for r in response[:-1])
74
+ _chatbot.append((None, (filename,)))
75
+ else:
76
+ response = response[0]['text']
77
+ _chatbot[-1] = (_parse_text(chat_query), response)
78
+ full_response = _parse_text(response)
79
 
80
+ task_history[-1] = (query, full_response)
81
+ print("Qwen2.5-VL-Chat: " + _parse_text(full_response))
82
+ yield _chatbot
83
+
84
+ def predict(_chatbot, task_history):
85
+ chat_query = _chatbot[-1][0]
86
+ query = task_history[-1][0]
87
+ if len(chat_query) == 0:
88
+ _chatbot.pop()
89
+ task_history.pop()
90
+ return _chatbot
91
+ print("User: " + _parse_text(query))
92
+ history_cp = copy.deepcopy(task_history)
93
+ full_response = ""
94
+ messages = []
95
+ content = []
96
+ for q, a in history_cp:
97
+ if isinstance(q, (tuple, list)):
98
+ if is_video_file(q[0]):
99
+ content.append({'video': f'file://{q[0]}'})
100
+ else:
101
+ content.append({'image': f'file://{q[0]}'})
102
+ else:
103
+ content.append({'text': q})
104
+ messages.append({'role': 'user', 'content': content})
105
+ messages.append({'role': 'assistant', 'content': [{'text': a}]})
106
+ content = []
107
+ messages.pop()
108
  messages = _transform_messages(messages)
109
  text = processor.apply_chat_template(
110
  messages, tokenize=False, add_generation_prompt=True)
 
111
  image_inputs, video_inputs = process_vision_info(messages)
112
  inputs = processor(text=[text], images=image_inputs,
113
  videos=video_inputs, padding=True, return_tensors='pt')
114
  inputs = inputs.to(model.device)
115
 
116
  streamer = TextIteratorStreamer(
117
+ tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
118
 
119
  gen_kwargs = {'max_new_tokens': 512, 'streamer': streamer, **inputs}
120
 
121
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
122
  thread.start()
123
 
124
+ # 初始化响应文本
125
+ full_response = ""
126
+ _chatbot[-1] = (_parse_text(chat_query), "") # 初始化空响应
127
+
128
+ # 处理流式输出
129
+ for new_text in streamer:
130
+ full_response += new_text
131
+ # 更新最后一条对话的响应部分
132
+ _chatbot[-1] = (_parse_text(chat_query), _parse_text(full_response))
133
+ yield _chatbot
134
+
135
+ # 最终处理(如果需要保存完整响应)
136
+ task_history[-1] = (chat_query, full_response)
137
+ print("Model Output: " + _parse_text(full_response))
138
+ yield _chatbot
139
+ """
140
  #for new_text in streamer:
141
  # yield new_text
142
 
 
144
  for chunk in streamer:
145
  buffer.append(chunk)
146
  yield "".join(buffer)
147
+ """
148
 
149
 
150
  def regenerate(_chatbot, task_history):
 
269
  streamer = TextIteratorStreamer(
270
  tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
271
 
272
+ gen_kwargs = {'max_new_tokens': 512, 'streamer': streamer, **inputs}
273
 
274
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
275
  thread.start()