prithivMLmods commited on
Commit
7342b9f
Β·
verified Β·
1 Parent(s): c373e16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -14,6 +14,23 @@ from PIL import Image
14
  import requests
15
  from io import BytesIO
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
18
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
19
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -42,7 +59,7 @@ def model_inference(input_dict, history):
42
  else:
43
  # For simplicity, use the first provided image.
44
  image = load_image(files[0])
45
- yield " > Processing with Aya-Vision β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–’β–’β–’ 69%"
46
  messages = [{
47
  "role": "user",
48
  "content": [
@@ -121,7 +138,7 @@ def model_inference(input_dict, history):
121
  thread.start()
122
 
123
  buffer = ""
124
- yield " > Processing with Qwen2VL Ocr β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–’β–’β–’ 69%"
125
  for new_text in streamer:
126
  buffer += new_text
127
  buffer = buffer.replace("<|im_end|>", "")
@@ -145,7 +162,12 @@ demo = gr.ChatInterface(
145
  fn=model_inference,
146
  description="# **Multimodal OCR `@aya-vision 'prompt..'`**",
147
  examples=examples,
148
- textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="By default, it runs Qwen2VL OCR, Tag @aya-vision for Aya Vision 8B"),
 
 
 
 
 
149
  stop_btn="Stop Generation",
150
  multimodal=True,
151
  cache_examples=False,
 
14
  import requests
15
  from io import BytesIO
16
 
17
+ # Helper function to return a progress bar HTML snippet.
18
+ def progress_bar_html(label: str) -> str:
19
+ return f'''
20
+ <div style="display: flex; align-items: center;">
21
+ <span style="margin-right: 10px; font-size: 14px;">{label}</span>
22
+ <div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
23
+ <div style="width: 100%; height: 100%; background-color: #00ff3a; animation: loading 1.5s linear infinite;"></div>
24
+ </div>
25
+ </div>
26
+ <style>
27
+ @keyframes loading {{
28
+ 0% {{ transform: translateX(-100%); }}
29
+ 100% {{ transform: translateX(100%); }}
30
+ }}
31
+ </style>
32
+ '''
33
+
34
  QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
35
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
36
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
 
59
  else:
60
  # For simplicity, use the first provided image.
61
  image = load_image(files[0])
62
+ yield progress_bar_html("Processing with Aya-Vision")
63
  messages = [{
64
  "role": "user",
65
  "content": [
 
138
  thread.start()
139
 
140
  buffer = ""
141
+ yield progress_bar_html("Processing with Qwen2VL OCR")
142
  for new_text in streamer:
143
  buffer += new_text
144
  buffer = buffer.replace("<|im_end|>", "")
 
162
  fn=model_inference,
163
  description="# **Multimodal OCR `@aya-vision 'prompt..'`**",
164
  examples=examples,
165
+ textbox=gr.MultimodalTextbox(
166
+ label="Query Input",
167
+ file_types=["image"],
168
+ file_count="multiple",
169
+ placeholder="By default, it runs Qwen2VL OCR, Tag @aya-vision for Aya Vision 8B"
170
+ ),
171
  stop_btn="Stop Generation",
172
  multimodal=True,
173
  cache_examples=False,