khang119966 commited on
Commit
6492ca5
·
verified ·
1 Parent(s): b92250d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -30
app.py CHANGED
@@ -126,6 +126,15 @@ def load_image(image_file, input_size=448, max_num=12):
126
  pixel_values = torch.stack(pixel_values)
127
  return pixel_values
128
 
 
 
 
 
 
 
 
 
 
129
  model = AutoModel.from_pretrained(
130
  "5CD-AI/Vintern-3B-R-beta",
131
  torch_dtype=torch.bfloat16,
@@ -136,35 +145,60 @@ model = AutoModel.from_pretrained(
136
  tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
137
 
138
  global_think_mode =False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  @spaces.GPU
141
  def chat(message, history):
142
  global global_think_mode
143
- if not global_think_mode:
144
- print("history",history)
145
- print("message",message)
 
 
 
 
146
 
147
- if len(history) != 0 and len(message["files"]) != 0:
148
- return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
149
- We currently only support one image at the start of the context! Please start a new conversation."""
150
-
151
- if len(history) == 0 and len(message["files"]) != 0:
152
- if "path" in message["files"][0]:
153
- test_image = message["files"][0]["path"]
154
- else:
155
- test_image = message["files"][0]
156
- pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
157
- elif len(history) == 0 and len(message["files"]) == 0:
158
- pixel_values = None
159
- elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
160
- test_image = history[0][0][0]
161
- pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
162
  else:
163
- pixel_values = None
 
 
 
 
 
 
 
 
164
 
165
-
166
- generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
167
-
168
  if len(history) == 0:
169
  if pixel_values is not None:
170
  question = '<image>\n'+message["text"]
@@ -199,15 +233,40 @@ def chat(message, history):
199
  time.sleep(0.02)
200
  yield generated_text_without_prompt
201
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  buffer = ""
203
- thinking = """🔄 Model is analyzing the data...\n
204
- I am processing your request carefully. First, I need to understand the question clearly.
205
- Then, I retrieve relevant information and analyze different possibilities.
206
- Finally, I generate a structured response that best fits your input.
207
- \nThis process ensures that I provide the most accurate and meaningful answer possible.
208
- """
209
 
210
- accumulated_text = "💡 **Thinking process:** *(Click to expand)*\n\n"
211
  accumulated_text += "<pre><code>\n"
212
 
213
  temp_text = ""
@@ -218,6 +277,17 @@ Finally, I generate a structured response that best fits your input.
218
 
219
  accumulated_text += temp_text + "\n</code></pre>\n\n---\n"
220
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  CSS ="""
223
  #component-10 {
@@ -318,7 +388,7 @@ def toggle_think_mode(current_state):
318
  global global_think_mode
319
  new_state = not current_state
320
  global_think_mode = not global_think_mode
321
- button_label = "💡Think🧠" if new_state else "🧠Think"
322
  return new_state, button_label
323
 
324
  demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')
 
126
  pixel_values = torch.stack(pixel_values)
127
  return pixel_values
128
 
129
+ def extract_conclusion(text):
130
+ match = re.search(r"<CONCLUSION>(.*?)</CONCLUSION>", text, re.DOTALL)
131
+ return match.group(1).strip() if match else ""
132
+
133
+ def extract_think(text):
134
+ text = re.sub(r"<.*?>", "", text.split("<CONCLUSION>")[0]) # Loại bỏ tất cả các tag <...>
135
+ conclusion_part = extract_conclusion(text)
136
+ return text.replace(conclusion_part, "").strip()
137
+
138
  model = AutoModel.from_pretrained(
139
  "5CD-AI/Vintern-3B-R-beta",
140
  torch_dtype=torch.bfloat16,
 
145
  tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
146
 
147
  global_think_mode =False
148
+ think_prompt = """Bạn là người rất cẩn thận và đa nghi, vui lòng trả lời câu hỏi dưới đây bằng tiếng Việt. Khi suy luận bạn thường liệt kê ra các bằng chứng để chỉ ra các đáp án khả thi, suy luận và giải thích tại sao lại lựa chọn và loại bỏ trước khi đưa ra câu trả lời cuối cùng.
149
+
150
+ Câu hỏi:
151
+ {question_input}
152
+
153
+ Hãy trả lời rất dài theo định dạng sau:
154
+ <SUMMARY>...</SUMMARY>
155
+
156
+ <CAPTION>...</CAPTION>
157
+
158
+ <FIND_CANDIDATES_REASONING>...</FIND_CANDIDATES_REASONING>
159
+
160
+ <TOP3_CANDIDATES>...</TOP3_CANDIDATES>
161
+
162
+ <REASONING_PLAN>...</REASONING_PLAN>
163
+
164
+ <REASONING>...</REASONING>
165
+
166
+ <COUNTER_ARGUMENTS>...</COUNTER_ARGUMENTS>
167
+
168
+ <VALIDATION_REASONING>...</VALIDATION_REASONING>
169
+
170
+ <CONCLUSION>...</CONCLUSION>
171
+ """
172
+
173
 
174
  @spaces.GPU
175
  def chat(message, history):
176
  global global_think_mode
177
+
178
+ print("history",history)
179
+ print("message",message)
180
+
181
+ if len(history) != 0 and len(message["files"]) != 0:
182
+ return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
183
+ We currently only support one image at the start of the context! Please start a new conversation."""
184
 
185
+ if len(history) == 0 and len(message["files"]) != 0:
186
+ if "path" in message["files"][0]:
187
+ test_image = message["files"][0]["path"]
 
 
 
 
 
 
 
 
 
 
 
 
188
  else:
189
+ test_image = message["files"][0]
190
+ pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
191
+ elif len(history) == 0 and len(message["files"]) == 0:
192
+ pixel_values = None
193
+ elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
194
+ test_image = history[0][0][0]
195
+ pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
196
+ else:
197
+ pixel_values = None
198
 
199
+ if not global_think_mode:
200
+ generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.0)
201
+
202
  if len(history) == 0:
203
  if pixel_values is not None:
204
  question = '<image>\n'+message["text"]
 
233
  time.sleep(0.02)
234
  yield generated_text_without_prompt
235
  else:
236
+ generation_config = dict(max_new_tokens= 2000, do_sample=False, num_beams = 3, repetition_penalty=2.0)
237
+
238
+ if len(history) == 0:
239
+ if pixel_values is not None:
240
+ question = '<image>\n'+ think_prompt.format(question_input=message["text"])
241
+ else:
242
+ question = think_prompt.format(question_input=message["text"])
243
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
244
+ else:
245
+ conv_history = []
246
+ if history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
247
+ start_index = 1
248
+ else:
249
+ start_index = 0
250
+
251
+ for i, chat_pair in enumerate(history[start_index:]):
252
+ if i == 0 and start_index == 1:
253
+ conv_history.append(tuple(['<image>\n'+chat_pair[0],chat_pair[1]]))
254
+ else:
255
+ conv_history.append(tuple(chat_pair))
256
+
257
+
258
+ print("conv_history",conv_history)
259
+ question = message["text"]
260
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
261
+
262
+ print(f'User: {question}\nAssistant: {response}')
263
+ think_part = extract_think(response)
264
+ conclusion_part = extract_conclusion(response)
265
+
266
  buffer = ""
267
+ thinking = think_part
 
 
 
 
 
268
 
269
+ accumulated_text = "💡 **Thinking process:**\n\n"
270
  accumulated_text += "<pre><code>\n"
271
 
272
  temp_text = ""
 
277
 
278
  accumulated_text += temp_text + "\n</code></pre>\n\n---\n"
279
 
280
+ # Yield phần kết luận
281
+ accumulated_text += "🎯 **Conclusion:**\n\n"
282
+
283
+ temp_text = ""
284
+ for char in conclusion_part:
285
+ temp_text += char
286
+ yield accumulated_text + temp_text + "\n\n---\n"
287
+ time.sleep(0.02)
288
+
289
+ accumulated_text += temp_text + "\n\n---\n"
290
+
291
 
292
  CSS ="""
293
  #component-10 {
 
388
  global global_think_mode
389
  new_state = not current_state
390
  global_think_mode = not global_think_mode
391
+ button_label = "🧠Think💡" if new_state else "🧠Think"
392
  return new_state, button_label
393
 
394
  demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')