prithivMLmods commited on
Commit
8de4827
·
verified ·
1 Parent(s): 808f211

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -10,7 +10,6 @@ import io
10
  from threading import Thread
11
  from reportlab.lib.pagesizes import A4
12
  from reportlab.lib.styles import getSampleStyleSheet
13
- from reportlab.lib import colors
14
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
15
  from reportlab.lib.units import inch
16
  from reportlab.pdfbase import pdfmetrics
@@ -64,6 +63,21 @@ def identify_and_save_blob(blob_path):
64
  except Exception as e:
65
  raise ValueError(f"An error occurred while processing the file: {e}")
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  @spaces.GPU
68
  def qwen_inference(model_name, media_input, text_input=None):
69
  """Handles inference for the selected model."""
@@ -72,7 +86,6 @@ def qwen_inference(model_name, media_input, text_input=None):
72
 
73
  # Determine media type and obtain a file path if needed
74
  if isinstance(media_input, str):
75
- # If the input is a file path, check extension
76
  media_path = media_input
77
  if media_path.endswith(tuple(image_extensions.keys())):
78
  media_type = "image"
@@ -83,13 +96,7 @@ def qwen_inference(model_name, media_input, text_input=None):
83
  raise ValueError("Unsupported media type. Please upload a valid image.")
84
  else:
85
  # media_input is a PIL image (or numpy array) coming from gr.Image
86
- if not isinstance(media_input, Image.Image):
87
- # In case gr.Image returns a numpy array, convert it.
88
- media_input = Image.fromarray(media_input)
89
- # Save the image temporarily to disk
90
- temp_filename = f"temp_{uuid.uuid4()}.png"
91
- media_input.save(temp_filename)
92
- media_path = temp_filename
93
  media_type = "image"
94
 
95
  messages = [
@@ -133,12 +140,13 @@ def qwen_inference(model_name, media_input, text_input=None):
133
 
134
  def format_plain_text(output_text):
135
  """Formats the output text as plain text without LaTeX delimiters."""
136
- # Remove LaTeX delimiters and convert to plain text
137
  plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
138
  return plain_text
139
 
140
- def generate_document(media_path, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size):
141
  """Generates a document with the input image and plain text output."""
 
 
142
  plain_text = format_plain_text(output_text)
143
  if file_format == "pdf":
144
  return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
@@ -248,7 +256,6 @@ with gr.Blocks(css=css) as demo:
248
  gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
249
 
250
  with gr.Tab(label="Image Input"):
251
-
252
  with gr.Row():
253
  with gr.Column():
254
  model_choice = gr.Dropdown(
@@ -262,7 +269,6 @@ with gr.Blocks(css=css) as demo:
262
  )
263
  text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
264
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
265
-
266
  with gr.Column():
267
  output_text = gr.Textbox(label="Output Text", lines=10)
268
  plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
@@ -347,12 +353,12 @@ with gr.Blocks(css=css) as demo:
347
  label="Image Size"
348
  )
349
  file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
350
-
351
  with gr.Row():
352
  get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
353
-
354
  get_document_btn.click(
355
- generate_document, [input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size], gr.File(label="Download Document")
 
 
356
  )
357
 
358
  demo.launch(debug=True)
 
10
  from threading import Thread
11
  from reportlab.lib.pagesizes import A4
12
  from reportlab.lib.styles import getSampleStyleSheet
 
13
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
14
  from reportlab.lib.units import inch
15
  from reportlab.pdfbase import pdfmetrics
 
63
  except Exception as e:
64
  raise ValueError(f"An error occurred while processing the file: {e}")
65
 
66
+ def get_media_file(media_input):
67
+ """
68
+ Ensures that the media input is a file path.
69
+ If it is a PIL image, it saves it temporarily and returns the file path.
70
+ """
71
+ if isinstance(media_input, str):
72
+ return media_input # Already a file path
73
+ else:
74
+ if not isinstance(media_input, Image.Image):
75
+ # Convert numpy array to PIL image if needed
76
+ media_input = Image.fromarray(media_input)
77
+ temp_filename = f"temp_{uuid.uuid4()}.png"
78
+ media_input.save(temp_filename)
79
+ return temp_filename
80
+
81
  @spaces.GPU
82
  def qwen_inference(model_name, media_input, text_input=None):
83
  """Handles inference for the selected model."""
 
86
 
87
  # Determine media type and obtain a file path if needed
88
  if isinstance(media_input, str):
 
89
  media_path = media_input
90
  if media_path.endswith(tuple(image_extensions.keys())):
91
  media_type = "image"
 
96
  raise ValueError("Unsupported media type. Please upload a valid image.")
97
  else:
98
  # media_input is a PIL image (or numpy array) coming from gr.Image
99
+ media_path = get_media_file(media_input)
 
 
 
 
 
 
100
  media_type = "image"
101
 
102
  messages = [
 
140
 
141
  def format_plain_text(output_text):
142
  """Formats the output text as plain text without LaTeX delimiters."""
 
143
  plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
144
  return plain_text
145
 
146
+ def generate_document(media_input, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size):
147
  """Generates a document with the input image and plain text output."""
148
+ # Ensure media_input is a file path.
149
+ media_path = get_media_file(media_input)
150
  plain_text = format_plain_text(output_text)
151
  if file_format == "pdf":
152
  return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
 
256
  gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
257
 
258
  with gr.Tab(label="Image Input"):
 
259
  with gr.Row():
260
  with gr.Column():
261
  model_choice = gr.Dropdown(
 
269
  )
270
  text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
271
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
 
272
  with gr.Column():
273
  output_text = gr.Textbox(label="Output Text", lines=10)
274
  plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
 
353
  label="Image Size"
354
  )
355
  file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
 
356
  with gr.Row():
357
  get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
 
358
  get_document_btn.click(
359
+ generate_document,
360
+ [input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size],
361
+ gr.File(label="Download Document")
362
  )
363
 
364
  demo.launch(debug=True)