Vintern-3B-R-Demo

Running on Zero

App Files Files Community

khang119966 commited on Mar 20

Commit

94865fa

verified ·

1 Parent(s): 27bc946

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -4

app.py CHANGED Viewed

@@ -117,6 +117,8 @@ def correct_image_orientation(image_path):
 def load_image(image_file, input_size=448, max_num=12):
     image = correct_image_orientation(image_file).convert('RGB')
     print("Image size: ", image.size)
     transform = build_transform(input_size=input_size)
     images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
@@ -125,12 +127,13 @@ def load_image(image_file, input_size=448, max_num=12):
     return pixel_values
 model = AutoModel.from_pretrained(
-    "5CD-AI/Vintern-1B-v3_5",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     trust_remote_code=True,
 ).eval().cuda()
-tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
 @spaces.GPU
 def chat(message, history):
@@ -285,15 +288,16 @@ demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')
 with demo:
     chat_demo_interface = gr.ChatInterface(
         fn=chat,
-        description="""**Vintern-1B-v3.5** is the latest in the Vintern series, bringing major improvements over v2 across all benchmarks. This **continuous fine-tuning Version** enhances Vietnamese capabilities while retaining strong English performance. It excels in OCR, text recognition, and Vietnam-specific document understanding.""",
         examples=[{"text": "Hãy viết một email giới thiệu sản phẩm trong ảnh.", "files":["./demo_3.jpg"]},
                   {"text": "Trích xuất các thông tin từ ảnh trả về markdown.", "files":["./demo_1.jpg"]},
                   {"text": "Bạn là nhân viên marketing chuyên nghiệp. Hãy viết một bài quảng cáo dài trên mạng xã hội giới thiệu về cửa hàng.", "files":["./demo_2.jpg"]},
                   {"text": "Trích xuất thông tin kiện hàng trong ảnh và trả về dạng JSON.", "files":["./demo_4.jpg"]}],
-        title="❄️ Vintern-1B-v3.5 Demo ❄️",
         multimodal=True,
         css=CSS,
         js=js,
         theme='NoCrypt/miku'
     )

 def load_image(image_file, input_size=448, max_num=12):
     image = correct_image_orientation(image_file).convert('RGB')
+    width, height = image.size
+    image = image.resize((width * 2, height * 2), Image.LANCZOS)
     print("Image size: ", image.size)
     transform = build_transform(input_size=input_size)
     images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
     return pixel_values
 model = AutoModel.from_pretrained(
+    "5CD-AI/Vintern-3B-R-beta",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     trust_remote_code=True,
+    use_flash_attn=True,
 ).eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
 @spaces.GPU
 def chat(message, history):
 with demo:
     chat_demo_interface = gr.ChatInterface(
         fn=chat,
+        description="""**Vintern-3B-R-beta** is the latest in the Vintern series.""",
         examples=[{"text": "Hãy viết một email giới thiệu sản phẩm trong ảnh.", "files":["./demo_3.jpg"]},
                   {"text": "Trích xuất các thông tin từ ảnh trả về markdown.", "files":["./demo_1.jpg"]},
                   {"text": "Bạn là nhân viên marketing chuyên nghiệp. Hãy viết một bài quảng cáo dài trên mạng xã hội giới thiệu về cửa hàng.", "files":["./demo_2.jpg"]},
                   {"text": "Trích xuất thông tin kiện hàng trong ảnh và trả về dạng JSON.", "files":["./demo_4.jpg"]}],
+        title="❄️ Vintern-3B-R-beta Demo ❄️",
         multimodal=True,
         css=CSS,
         js=js,
+        additional_inputs=[gr.Checkbox(label="Think", value=False)],  # Thêm checkbox
         theme='NoCrypt/miku'
     )