prithivMLmods commited on
Commit
66f98ad
·
verified ·
1 Parent(s): a71e844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -38,6 +38,7 @@ for name, model_id in MODEL_OPTIONS.items():
38
  ).to("cuda").eval()
39
  processors[name] = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
40
 
 
41
  image_extensions = Image.registered_extensions()
42
 
43
  def identify_and_save_blob(blob_path):
@@ -69,15 +70,27 @@ def qwen_inference(model_name, media_input, text_input=None):
69
  model = models[model_name]
70
  processor = processors[model_name]
71
 
 
72
  if isinstance(media_input, str):
 
73
  media_path = media_input
74
- if media_path.endswith(tuple([i for i in image_extensions.keys()])):
75
  media_type = "image"
76
  else:
77
  try:
78
  media_path, media_type = identify_and_save_blob(media_input)
79
  except Exception as e:
80
  raise ValueError("Unsupported media type. Please upload a valid image.")
 
 
 
 
 
 
 
 
 
 
81
 
82
  messages = [
83
  {
@@ -154,7 +167,7 @@ def generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, a
154
  "Justified": 4
155
  }[alignment]
156
 
157
- # Register font
158
  font_path = f"font/{font_choice}"
159
  pdfmetrics.registerFont(TTFont(font_choice, font_path))
160
 
@@ -171,8 +184,8 @@ def generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, a
171
  story.append(Spacer(1, 12))
172
 
173
  # Add plain text output
174
- text = Paragraph(plain_text, styles["Normal"])
175
- story.append(text)
176
 
177
  doc.build(story)
178
  return filename
@@ -243,8 +256,9 @@ with gr.Blocks(css=css) as demo:
243
  choices=list(MODEL_OPTIONS.keys()),
244
  value="Latex OCR"
245
  )
246
- input_media = gr.File(
247
- label="Upload Image", type="filepath"
 
248
  )
249
  text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
250
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
 
38
  ).to("cuda").eval()
39
  processors[name] = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
40
 
41
+ # Get valid image extensions from PIL
42
  image_extensions = Image.registered_extensions()
43
 
44
  def identify_and_save_blob(blob_path):
 
70
  model = models[model_name]
71
  processor = processors[model_name]
72
 
73
+ # Determine media type and obtain a file path if needed
74
  if isinstance(media_input, str):
75
+ # If the input is a file path, check extension
76
  media_path = media_input
77
+ if media_path.endswith(tuple(image_extensions.keys())):
78
  media_type = "image"
79
  else:
80
  try:
81
  media_path, media_type = identify_and_save_blob(media_input)
82
  except Exception as e:
83
  raise ValueError("Unsupported media type. Please upload a valid image.")
84
+ else:
85
+ # media_input is a PIL image (or numpy array) coming from gr.Image
86
+ if not isinstance(media_input, Image.Image):
87
+ # In case gr.Image returns a numpy array, convert it.
88
+ media_input = Image.fromarray(media_input)
89
+ # Save the image temporarily to disk
90
+ temp_filename = f"temp_{uuid.uuid4()}.png"
91
+ media_input.save(temp_filename)
92
+ media_path = temp_filename
93
+ media_type = "image"
94
 
95
  messages = [
96
  {
 
167
  "Justified": 4
168
  }[alignment]
169
 
170
+ # Register font (assumes font files are available in a folder named "font")
171
  font_path = f"font/{font_choice}"
172
  pdfmetrics.registerFont(TTFont(font_choice, font_path))
173
 
 
184
  story.append(Spacer(1, 12))
185
 
186
  # Add plain text output
187
+ text_para = Paragraph(plain_text, styles["Normal"])
188
+ story.append(text_para)
189
 
190
  doc.build(story)
191
  return filename
 
256
  choices=list(MODEL_OPTIONS.keys()),
257
  value="Latex OCR"
258
  )
259
+ # Using gr.Image instead of gr.File for image upload
260
+ input_media = gr.Image(
261
+ label="Upload Image", type="pil"
262
  )
263
  text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
264
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")