prithivMLmods commited on
Commit
f22b5b6
·
verified ·
1 Parent(s): 564e537

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -18,7 +18,6 @@ from transformers import Qwen2_5_VLForConditionalGeneration
18
  def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
19
  """
20
  Returns an HTML snippet for a thin animated progress bar with a label.
21
- Colors can be customized; default colors are used for Qwen2VL/Aya‑Vision.
22
  """
23
  return f'''
24
  <div style="display: flex; align-items: center;">
@@ -37,7 +36,7 @@ def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_colo
37
 
38
  def downsample_video(video_path):
39
  """
40
- Downsamples a video file by extracting 10 evenly spaced frames.
41
  Returns a list of tuples (PIL.Image, timestamp).
42
  """
43
  vidcap = cv2.VideoCapture(video_path)
@@ -132,14 +131,28 @@ def model_inference(input_dict, history, use_rolmocr=False):
132
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
133
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
134
  thread.start()
 
135
  buffer = ""
136
  yield progress_bar_html(f"Processing with {model_name}")
 
 
137
  for new_text in streamer:
138
  buffer += new_text
139
  buffer = buffer.replace("<|im_end|>", "")
140
  time.sleep(0.01)
141
  yield buffer
142
 
 
 
 
 
 
 
 
 
 
 
 
143
  # Gradio Interface
144
  examples = [
145
  [{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
@@ -149,7 +162,7 @@ examples = [
149
 
150
  demo = gr.ChatInterface(
151
  fn=model_inference,
152
- description="# **Multimodal OCR `@RolmOCR and Default Qwen2VL OCR`**",
153
  examples=examples,
154
  textbox=gr.MultimodalTextbox(
155
  label="Query Input",
@@ -163,4 +176,5 @@ demo = gr.ChatInterface(
163
  additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
164
  )
165
 
166
- demo.launch(debug=True)
 
 
18
  def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
19
  """
20
  Returns an HTML snippet for a thin animated progress bar with a label.
 
21
  """
22
  return f'''
23
  <div style="display: flex; align-items: center;">
 
36
 
37
  def downsample_video(video_path):
38
  """
39
+ Downsamples a video file by extracting 25 evenly spaced frames.
40
  Returns a list of tuples (PIL.Image, timestamp).
41
  """
42
  vidcap = cv2.VideoCapture(video_path)
 
131
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
132
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
133
  thread.start()
134
+
135
  buffer = ""
136
  yield progress_bar_html(f"Processing with {model_name}")
137
+
138
+ # Stream tokens
139
  for new_text in streamer:
140
  buffer += new_text
141
  buffer = buffer.replace("<|im_end|>", "")
142
  time.sleep(0.01)
143
  yield buffer
144
 
145
+ # Ensure generation finished
146
+ thread.join()
147
+
148
+ # Write final response to file
149
+ try:
150
+ with open("response.txt", "w", encoding="utf-8") as f:
151
+ f.write(buffer.strip())
152
+ except Exception as e:
153
+ # If writing fails, you can log or yield an error message
154
+ yield f"Warning: could not write response to file: {e}"
155
+
156
  # Gradio Interface
157
  examples = [
158
  [{"text": "OCR the Text in the Image", "files": ["rolm/1.jpeg"]}],
 
162
 
163
  demo = gr.ChatInterface(
164
  fn=model_inference,
165
+ description="# **Multimodal OCR `RolmOCR and Default Qwen2VL OCR`**",
166
  examples=examples,
167
  textbox=gr.MultimodalTextbox(
168
  label="Query Input",
 
176
  additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
177
  )
178
 
179
+ if __name__ == "__main__":
180
+ demo.launch(debug=True)