Spaces:

prithivMLmods
/

Multimodal-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on May 5

Commit

8bf8d90

verified ·

1 Parent(s): e576635

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -15

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from transformers import Qwen2_5_VLForConditionalGeneration
 def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
     """
     Returns an HTML snippet for a thin animated progress bar with a label.
-    Colors can be customized; default colors are used for Qwen2VL/Aya-Vision.
     """
     return f'''
 <div style="display: flex; align-items: center;">
@@ -37,7 +37,7 @@ def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_colo
 def downsample_video(video_path):
     """
-    Downsamples a video file by extracting 25 evenly spaced frames.
     Returns a list of tuples (PIL.Image, timestamp).
     """
     vidcap = cv2.VideoCapture(video_path)
@@ -132,23 +132,14 @@ def model_inference(input_dict, history, use_rolmocr=False):
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     buffer = ""
     yield progress_bar_html(f"Processing with {model_name}")
-    # Stream generation
     for new_text in streamer:
-        buffer += new_text.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer
-    # Once complete, save to response.txt and yield final confirmation
-    with open("response.txt", "w", encoding="utf-8") as f:
-        f.write(buffer)
-    yield f"\n✅ Response saved to `response.txt`:\n\n{buffer}"
-    return
 # Gradio Interface
 examples = [
     [{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
@@ -172,5 +163,4 @@ demo = gr.ChatInterface(
     additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
 )
-if __name__ == "__main__":
-    demo.launch(debug=True)

 def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
     """
     Returns an HTML snippet for a thin animated progress bar with a label.
+    Colors can be customized; default colors are used for Qwen2VL/Aya‑Vision.
     """
     return f'''
 <div style="display: flex; align-items: center;">
 def downsample_video(video_path):
     """
+    Downsamples a video file by extracting 10 evenly spaced frames.
     Returns a list of tuples (PIL.Image, timestamp).
     """
     vidcap = cv2.VideoCapture(video_path)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     buffer = ""
     yield progress_bar_html(f"Processing with {model_name}")
     for new_text in streamer:
+        buffer += new_text
+        buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer
 # Gradio Interface
 examples = [
     [{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
     additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
 )
+demo.launch(debug=True, ssr_mode=False)