Vintern-3B-R-Demo

Running on Zero

App Files Files Community

khang119966 commited on Mar 21

Commit

6a42240

verified ·

1 Parent(s): 90eac3c

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -4

app.py CHANGED Viewed

@@ -134,6 +134,19 @@ def extract_think(text):
     text = re.sub(r"<.*?>", "", text.split("<CONCLUSION>")[0])  # Loại bỏ tất cả các tag <...>
     conclusion_part = extract_conclusion(text)
     return text.replace(conclusion_part, "").strip()
 model = AutoModel.from_pretrained(
     "5CD-AI/Vintern-3B-R-beta",
@@ -142,6 +155,7 @@ model = AutoModel.from_pretrained(
     trust_remote_code=True,
     use_flash_attn=True,
 ).eval().cuda()
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
 global_think_mode =False
@@ -236,7 +250,7 @@ We currently only support one image at the start of the context! Please start a
           yield generated_text_without_prompt
     else:
         ####################################################### thinking #######################################################
-        generation_config = dict(max_new_tokens= 2000, do_sample=False, num_beams = 3, repetition_penalty=2.0)
         if len(history) == 0:
             if pixel_values is not None:
@@ -263,9 +277,11 @@ We currently only support one image at the start of the context! Please start a
             response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
         print(f'User: {question}\nAssistant: {response}')
-        think_part = extract_think(response)
         conclusion_part = extract_conclusion(response)
         buffer = ""
         thinking = think_part
@@ -276,7 +292,7 @@ We currently only support one image at the start of the context! Please start a
         for char in thinking:
             temp_text += char
             yield accumulated_text + temp_text + "\n</code></pre>\n"
-            time.sleep(0.0002)
         accumulated_text += temp_text + "\n</code></pre>\n"

     text = re.sub(r"<.*?>", "", text.split("<CONCLUSION>")[0])  # Loại bỏ tất cả các tag <...>
     conclusion_part = extract_conclusion(text)
     return text.replace(conclusion_part, "").strip()
+def wrap_text(text, max_words=20):
+    lines = text.split('\n')  # Cắt theo dòng trước
+    wrapped_lines = []
+    for line in lines:
+        words = line.split()
+        if len(words) > max_words:
+            wrapped_lines.extend([' '.join(words[i:i+max_words]) for i in range(0, len(words), max_words)])
+        else:
+            wrapped_lines.append(line)
+    return '\n'.join(wrapped_lines)
 model = AutoModel.from_pretrained(
     "5CD-AI/Vintern-3B-R-beta",
     trust_remote_code=True,
     use_flash_attn=True,
 ).eval().cuda()
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
 global_think_mode =False
           yield generated_text_without_prompt
     else:
         ####################################################### thinking #######################################################
+        generation_config = dict(max_new_tokens= 2000, do_sample=False, num_beams = 2, repetition_penalty=2.0)
         if len(history) == 0:
             if pixel_values is not None:
             response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
         print(f'User: {question}\nAssistant: {response}')
+        think_part = wrap_text(extract_think(response))
         conclusion_part = extract_conclusion(response)
+        if conclusion_part == "":
+            conclusion_part = think_part
         buffer = ""
         thinking = think_part
         for char in thinking:
             temp_text += char
             yield accumulated_text + temp_text + "\n</code></pre>\n"
+            time.sleep(0.002)
         accumulated_text += temp_text + "\n</code></pre>\n"