Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,6 @@ from transformers import Qwen2_5_VLForConditionalGeneration
|
|
18 |
def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
|
19 |
"""
|
20 |
Returns an HTML snippet for a thin animated progress bar with a label.
|
21 |
-
Colors can be customized; default colors are used for Qwen2VL/Aya‑Vision.
|
22 |
"""
|
23 |
return f'''
|
24 |
<div style="display: flex; align-items: center;">
|
@@ -37,7 +36,7 @@ def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_colo
|
|
37 |
|
38 |
def downsample_video(video_path):
|
39 |
"""
|
40 |
-
Downsamples a video file by extracting
|
41 |
Returns a list of tuples (PIL.Image, timestamp).
|
42 |
"""
|
43 |
vidcap = cv2.VideoCapture(video_path)
|
@@ -132,14 +131,28 @@ def model_inference(input_dict, history, use_rolmocr=False):
|
|
132 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
133 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
134 |
thread.start()
|
|
|
135 |
buffer = ""
|
136 |
yield progress_bar_html(f"Processing with {model_name}")
|
|
|
|
|
137 |
for new_text in streamer:
|
138 |
buffer += new_text
|
139 |
buffer = buffer.replace("<|im_end|>", "")
|
140 |
time.sleep(0.01)
|
141 |
yield buffer
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
# Gradio Interface
|
144 |
examples = [
|
145 |
[{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
|
@@ -149,7 +162,7 @@ examples = [
|
|
149 |
|
150 |
demo = gr.ChatInterface(
|
151 |
fn=model_inference,
|
152 |
-
description="# **Multimodal OCR
|
153 |
examples=examples,
|
154 |
textbox=gr.MultimodalTextbox(
|
155 |
label="Query Input",
|
@@ -163,4 +176,5 @@ demo = gr.ChatInterface(
|
|
163 |
additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
|
164 |
)
|
165 |
|
166 |
-
|
|
|
|
18 |
def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
|
19 |
"""
|
20 |
Returns an HTML snippet for a thin animated progress bar with a label.
|
|
|
21 |
"""
|
22 |
return f'''
|
23 |
<div style="display: flex; align-items: center;">
|
|
|
36 |
|
37 |
def downsample_video(video_path):
|
38 |
"""
|
39 |
+
Downsamples a video file by extracting 25 evenly spaced frames.
|
40 |
Returns a list of tuples (PIL.Image, timestamp).
|
41 |
"""
|
42 |
vidcap = cv2.VideoCapture(video_path)
|
|
|
131 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
132 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
133 |
thread.start()
|
134 |
+
|
135 |
buffer = ""
|
136 |
yield progress_bar_html(f"Processing with {model_name}")
|
137 |
+
|
138 |
+
# Stream tokens
|
139 |
for new_text in streamer:
|
140 |
buffer += new_text
|
141 |
buffer = buffer.replace("<|im_end|>", "")
|
142 |
time.sleep(0.01)
|
143 |
yield buffer
|
144 |
|
145 |
+
# Ensure generation finished
|
146 |
+
thread.join()
|
147 |
+
|
148 |
+
# Write final response to file
|
149 |
+
try:
|
150 |
+
with open("response.txt", "w", encoding="utf-8") as f:
|
151 |
+
f.write(buffer.strip())
|
152 |
+
except Exception as e:
|
153 |
+
# If writing fails, you can log or yield an error message
|
154 |
+
yield f"Warning: could not write response to file: {e}"
|
155 |
+
|
156 |
# Gradio Interface
|
157 |
examples = [
|
158 |
[{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
|
|
|
162 |
|
163 |
demo = gr.ChatInterface(
|
164 |
fn=model_inference,
|
165 |
+
description="# **Multimodal OCR `RolmOCR and Default Qwen2VL OCR`**",
|
166 |
examples=examples,
|
167 |
textbox=gr.MultimodalTextbox(
|
168 |
label="Query Input",
|
|
|
176 |
additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
|
177 |
)
|
178 |
|
179 |
+
if __name__ == "__main__":
|
180 |
+
demo.launch(debug=True)
|