Vintern-3B-R-Demo

Running on Zero

App Files Files Community

khang119966 commited on Mar 20

Commit

6492ca5

verified ·

1 Parent(s): b92250d

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -30

app.py CHANGED Viewed

@@ -126,6 +126,15 @@ def load_image(image_file, input_size=448, max_num=12):
     pixel_values = torch.stack(pixel_values)
     return pixel_values
 model = AutoModel.from_pretrained(
     "5CD-AI/Vintern-3B-R-beta",
     torch_dtype=torch.bfloat16,
@@ -136,35 +145,60 @@ model = AutoModel.from_pretrained(
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
 global_think_mode =False
 @spaces.GPU
 def chat(message, history):
     global global_think_mode
-    if not global_think_mode:
-        print("history",history)
-        print("message",message)
-        if len(history) != 0 and len(message["files"]) != 0:
-            return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
-    We currently only support one image at the start of the context! Please start a new conversation."""
-        if len(history) == 0 and len(message["files"]) != 0:
-            if "path" in message["files"][0]:
-                test_image = message["files"][0]["path"]
-            else:
-                test_image = message["files"][0]
-            pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
-        elif len(history) == 0 and len(message["files"]) == 0:
-            pixel_values = None
-        elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
-            test_image = history[0][0][0]
-            pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
         else:
-            pixel_values = None
-        generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
         if len(history) == 0:
             if pixel_values is not None:
                 question = '<image>\n'+message["text"]
@@ -199,15 +233,40 @@ def chat(message, history):
           time.sleep(0.02)
           yield generated_text_without_prompt
     else:
         buffer = ""
-        thinking = """🔄 Model is analyzing the data...\n
-I am processing your request carefully. First, I need to understand the question clearly.
-Then, I retrieve relevant information and analyze different possibilities.
-Finally, I generate a structured response that best fits your input.
-\nThis process ensures that I provide the most accurate and meaningful answer possible.
-"""
-        accumulated_text = "💡 **Thinking process:** *(Click to expand)*\n\n"
         accumulated_text += "<pre><code>\n"
         temp_text = ""
@@ -218,6 +277,17 @@ Finally, I generate a structured response that best fits your input.
         accumulated_text += temp_text + "\n</code></pre>\n\n---\n"
 CSS ="""
 #component-10 {
@@ -318,7 +388,7 @@ def toggle_think_mode(current_state):
     global global_think_mode
     new_state = not current_state
     global_think_mode = not global_think_mode
-    button_label = "💡Think🧠" if new_state else "🧠Think"
     return new_state, button_label
 demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')

     pixel_values = torch.stack(pixel_values)
     return pixel_values
+def extract_conclusion(text):
+    match = re.search(r"<CONCLUSION>(.*?)</CONCLUSION>", text, re.DOTALL)
+    return match.group(1).strip() if match else ""
+def extract_think(text):
+    text = re.sub(r"<.*?>", "", text.split("<CONCLUSION>")[0])  # Loại bỏ tất cả các tag <...>
+    conclusion_part = extract_conclusion(text)
+    return text.replace(conclusion_part, "").strip()
 model = AutoModel.from_pretrained(
     "5CD-AI/Vintern-3B-R-beta",
     torch_dtype=torch.bfloat16,
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
 global_think_mode =False
+think_prompt = """Bạn là người rất cẩn thận và đa nghi, vui lòng trả lời câu hỏi dưới đây bằng tiếng Việt. Khi suy luận bạn thường liệt kê ra các bằng chứng để chỉ ra các đáp án khả thi, suy luận và giải thích tại sao lại lựa chọn và loại bỏ trước khi đưa ra câu trả lời cuối cùng.
+Câu hỏi:
+{question_input}
+Hãy trả lời rất dài theo định dạng sau:
+<SUMMARY>...</SUMMARY>
+<CAPTION>...</CAPTION>
+<FIND_CANDIDATES_REASONING>...</FIND_CANDIDATES_REASONING>
+<TOP3_CANDIDATES>...</TOP3_CANDIDATES>
+<REASONING_PLAN>...</REASONING_PLAN>
+<REASONING>...</REASONING>
+<COUNTER_ARGUMENTS>...</COUNTER_ARGUMENTS>
+<VALIDATION_REASONING>...</VALIDATION_REASONING>
+<CONCLUSION>...</CONCLUSION>
+"""
 @spaces.GPU
 def chat(message, history):
     global global_think_mode
+    print("history",history)
+    print("message",message)
+    if len(history) != 0 and len(message["files"]) != 0:
+        return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
+We currently only support one image at the start of the context! Please start a new conversation."""
+    if len(history) == 0 and len(message["files"]) != 0:
+        if "path" in message["files"][0]:
+            test_image = message["files"][0]["path"]
         else:
+            test_image = message["files"][0]
+        pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
+    elif len(history) == 0 and len(message["files"]) == 0:
+        pixel_values = None
+    elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
+        test_image = history[0][0][0]
+        pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
+    else:
+        pixel_values = None
+    if not global_think_mode:
+        generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.0)
         if len(history) == 0:
             if pixel_values is not None:
                 question = '<image>\n'+message["text"]
           time.sleep(0.02)
           yield generated_text_without_prompt
     else:
+        generation_config = dict(max_new_tokens= 2000, do_sample=False, num_beams = 3, repetition_penalty=2.0)
+        if len(history) == 0:
+            if pixel_values is not None:
+                question = '<image>\n'+ think_prompt.format(question_input=message["text"])
+            else:
+                question = think_prompt.format(question_input=message["text"])
+            response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
+        else:
+            conv_history = []
+            if history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
+                start_index = 1
+            else:
+                start_index = 0
+            for i, chat_pair in enumerate(history[start_index:]):
+                if i == 0 and start_index == 1:
+                     conv_history.append(tuple(['<image>\n'+chat_pair[0],chat_pair[1]]))
+                else:
+                    conv_history.append(tuple(chat_pair))
+            print("conv_history",conv_history)
+            question = message["text"]
+            response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
+        print(f'User: {question}\nAssistant: {response}')
+        think_part = extract_think(response)
+        conclusion_part = extract_conclusion(response)
         buffer = ""
+        thinking = think_part
+        accumulated_text = "💡 **Thinking process:**\n\n"
         accumulated_text += "<pre><code>\n"
         temp_text = ""
         accumulated_text += temp_text + "\n</code></pre>\n\n---\n"
+        # Yield phần kết luận
+        accumulated_text += "🎯 **Conclusion:**\n\n"
+        temp_text = ""
+        for char in conclusion_part:
+            temp_text += char
+            yield accumulated_text + temp_text + "\n\n---\n"
+            time.sleep(0.02)
+        accumulated_text += temp_text + "\n\n---\n"
 CSS ="""
 #component-10 {
     global global_think_mode
     new_state = not current_state
     global_think_mode = not global_think_mode
+    button_label = "🧠Think💡" if new_state else "🧠Think"
     return new_state, button_label
 demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')