prithivMLmods commited on
Commit
a8067dc
·
verified ·
1 Parent(s): f22b5b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -15,9 +15,11 @@ from transformers import (
15
  from transformers import Qwen2_5_VLForConditionalGeneration
16
 
17
  # Helper Functions
 
18
  def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
19
  """
20
  Returns an HTML snippet for a thin animated progress bar with a label.
 
21
  """
22
  return f'''
23
  <div style="display: flex; align-items: center;">
@@ -34,6 +36,7 @@ def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_colo
34
  </style>
35
  '''
36
 
 
37
  def downsample_video(video_path):
38
  """
39
  Downsamples a video file by extracting 25 evenly spaced frames.
@@ -78,7 +81,7 @@ rolmocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
78
  # Main Inference Function
79
  @spaces.GPU
80
  def model_inference(input_dict, history, use_rolmocr=False):
81
- text = input_dict["text"].strip()
82
  files = input_dict.get("files", [])
83
 
84
  if not text and not files:
@@ -133,25 +136,25 @@ def model_inference(input_dict, history, use_rolmocr=False):
133
  thread.start()
134
 
135
  buffer = ""
 
136
  yield progress_bar_html(f"Processing with {model_name}")
137
 
138
- # Stream tokens
139
  for new_text in streamer:
140
  buffer += new_text
141
  buffer = buffer.replace("<|im_end|>", "")
142
  time.sleep(0.01)
143
  yield buffer
144
 
145
- # Ensure generation finished
146
  thread.join()
147
 
148
- # Write final response to file
149
  try:
150
  with open("response.txt", "w", encoding="utf-8") as f:
151
- f.write(buffer.strip())
152
  except Exception as e:
153
- # If writing fails, you can log or yield an error message
154
- yield f"Warning: could not write response to file: {e}"
155
 
156
  # Gradio Interface
157
  examples = [
@@ -160,9 +163,10 @@ examples = [
160
  [{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
161
  ]
162
 
 
163
  demo = gr.ChatInterface(
164
  fn=model_inference,
165
- description="# **Multimodal OCR `RolmOCR and Default Qwen2VL OCR`**",
166
  examples=examples,
167
  textbox=gr.MultimodalTextbox(
168
  label="Query Input",
@@ -176,5 +180,4 @@ demo = gr.ChatInterface(
176
  additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
177
  )
178
 
179
- if __name__ == "__main__":
180
- demo.launch(debug=True)
 
15
  from transformers import Qwen2_5_VLForConditionalGeneration
16
 
17
  # Helper Functions
18
+
19
  def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
20
  """
21
  Returns an HTML snippet for a thin animated progress bar with a label.
22
+ Colors can be customized; default colors are used for Qwen2VL/Aya‑Vision.
23
  """
24
  return f'''
25
  <div style="display: flex; align-items: center;">
 
36
  </style>
37
  '''
38
 
39
+
40
  def downsample_video(video_path):
41
  """
42
  Downsamples a video file by extracting 25 evenly spaced frames.
 
81
  # Main Inference Function
82
  @spaces.GPU
83
  def model_inference(input_dict, history, use_rolmocr=False):
84
+ text = input_dict.get("text", "").strip()
85
  files = input_dict.get("files", [])
86
 
87
  if not text and not files:
 
136
  thread.start()
137
 
138
  buffer = ""
139
+ # Send initial progress bar
140
  yield progress_bar_html(f"Processing with {model_name}")
141
 
142
+ # Stream generation
143
  for new_text in streamer:
144
  buffer += new_text
145
  buffer = buffer.replace("<|im_end|>", "")
146
  time.sleep(0.01)
147
  yield buffer
148
 
149
+ # Ensure generation is complete
150
  thread.join()
151
 
152
+ # Save the full response to response.txt
153
  try:
154
  with open("response.txt", "w", encoding="utf-8") as f:
155
+ f.write(buffer)
156
  except Exception as e:
157
+ yield f"Error saving response: {e}"
 
158
 
159
  # Gradio Interface
160
  examples = [
 
163
  [{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
164
  ]
165
 
166
+
167
  demo = gr.ChatInterface(
168
  fn=model_inference,
169
+ description="# **Multimodal OCR `@RolmOCR and Default Qwen2VL OCR`**",
170
  examples=examples,
171
  textbox=gr.MultimodalTextbox(
172
  label="Query Input",
 
180
  additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
181
  )
182
 
183
+ demo.launch(debug=True)