Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ import io
|
|
10 |
from threading import Thread
|
11 |
from reportlab.lib.pagesizes import A4
|
12 |
from reportlab.lib.styles import getSampleStyleSheet
|
13 |
-
from reportlab.lib import colors
|
14 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
15 |
from reportlab.lib.units import inch
|
16 |
from reportlab.pdfbase import pdfmetrics
|
@@ -64,6 +63,21 @@ def identify_and_save_blob(blob_path):
|
|
64 |
except Exception as e:
|
65 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
@spaces.GPU
|
68 |
def qwen_inference(model_name, media_input, text_input=None):
|
69 |
"""Handles inference for the selected model."""
|
@@ -72,7 +86,6 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
72 |
|
73 |
# Determine media type and obtain a file path if needed
|
74 |
if isinstance(media_input, str):
|
75 |
-
# If the input is a file path, check extension
|
76 |
media_path = media_input
|
77 |
if media_path.endswith(tuple(image_extensions.keys())):
|
78 |
media_type = "image"
|
@@ -83,13 +96,7 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
83 |
raise ValueError("Unsupported media type. Please upload a valid image.")
|
84 |
else:
|
85 |
# media_input is a PIL image (or numpy array) coming from gr.Image
|
86 |
-
|
87 |
-
# In case gr.Image returns a numpy array, convert it.
|
88 |
-
media_input = Image.fromarray(media_input)
|
89 |
-
# Save the image temporarily to disk
|
90 |
-
temp_filename = f"temp_{uuid.uuid4()}.png"
|
91 |
-
media_input.save(temp_filename)
|
92 |
-
media_path = temp_filename
|
93 |
media_type = "image"
|
94 |
|
95 |
messages = [
|
@@ -133,12 +140,13 @@ def qwen_inference(model_name, media_input, text_input=None):
|
|
133 |
|
134 |
def format_plain_text(output_text):
|
135 |
"""Formats the output text as plain text without LaTeX delimiters."""
|
136 |
-
# Remove LaTeX delimiters and convert to plain text
|
137 |
plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
|
138 |
return plain_text
|
139 |
|
140 |
-
def generate_document(
|
141 |
"""Generates a document with the input image and plain text output."""
|
|
|
|
|
142 |
plain_text = format_plain_text(output_text)
|
143 |
if file_format == "pdf":
|
144 |
return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
|
@@ -248,7 +256,6 @@ with gr.Blocks(css=css) as demo:
|
|
248 |
gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
|
249 |
|
250 |
with gr.Tab(label="Image Input"):
|
251 |
-
|
252 |
with gr.Row():
|
253 |
with gr.Column():
|
254 |
model_choice = gr.Dropdown(
|
@@ -262,7 +269,6 @@ with gr.Blocks(css=css) as demo:
|
|
262 |
)
|
263 |
text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
|
264 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
265 |
-
|
266 |
with gr.Column():
|
267 |
output_text = gr.Textbox(label="Output Text", lines=10)
|
268 |
plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
|
@@ -347,12 +353,12 @@ with gr.Blocks(css=css) as demo:
|
|
347 |
label="Image Size"
|
348 |
)
|
349 |
file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
|
350 |
-
|
351 |
with gr.Row():
|
352 |
get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
|
353 |
-
|
354 |
get_document_btn.click(
|
355 |
-
generate_document,
|
|
|
|
|
356 |
)
|
357 |
|
358 |
demo.launch(debug=True)
|
|
|
10 |
from threading import Thread
|
11 |
from reportlab.lib.pagesizes import A4
|
12 |
from reportlab.lib.styles import getSampleStyleSheet
|
|
|
13 |
from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
|
14 |
from reportlab.lib.units import inch
|
15 |
from reportlab.pdfbase import pdfmetrics
|
|
|
63 |
except Exception as e:
|
64 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
65 |
|
66 |
+
def get_media_file(media_input):
|
67 |
+
"""
|
68 |
+
Ensures that the media input is a file path.
|
69 |
+
If it is a PIL image, it saves it temporarily and returns the file path.
|
70 |
+
"""
|
71 |
+
if isinstance(media_input, str):
|
72 |
+
return media_input # Already a file path
|
73 |
+
else:
|
74 |
+
if not isinstance(media_input, Image.Image):
|
75 |
+
# Convert numpy array to PIL image if needed
|
76 |
+
media_input = Image.fromarray(media_input)
|
77 |
+
temp_filename = f"temp_{uuid.uuid4()}.png"
|
78 |
+
media_input.save(temp_filename)
|
79 |
+
return temp_filename
|
80 |
+
|
81 |
@spaces.GPU
|
82 |
def qwen_inference(model_name, media_input, text_input=None):
|
83 |
"""Handles inference for the selected model."""
|
|
|
86 |
|
87 |
# Determine media type and obtain a file path if needed
|
88 |
if isinstance(media_input, str):
|
|
|
89 |
media_path = media_input
|
90 |
if media_path.endswith(tuple(image_extensions.keys())):
|
91 |
media_type = "image"
|
|
|
96 |
raise ValueError("Unsupported media type. Please upload a valid image.")
|
97 |
else:
|
98 |
# media_input is a PIL image (or numpy array) coming from gr.Image
|
99 |
+
media_path = get_media_file(media_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
media_type = "image"
|
101 |
|
102 |
messages = [
|
|
|
140 |
|
141 |
def format_plain_text(output_text):
|
142 |
"""Formats the output text as plain text without LaTeX delimiters."""
|
|
|
143 |
plain_text = output_text.replace("\\(", "").replace("\\)", "").replace("\\[", "").replace("\\]", "")
|
144 |
return plain_text
|
145 |
|
146 |
+
def generate_document(media_input, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size):
|
147 |
"""Generates a document with the input image and plain text output."""
|
148 |
+
# Ensure media_input is a file path.
|
149 |
+
media_path = get_media_file(media_input)
|
150 |
plain_text = format_plain_text(output_text)
|
151 |
if file_format == "pdf":
|
152 |
return generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, alignment, image_size)
|
|
|
256 |
gr.Markdown("# Qwen2VL: Compact Vision & Language Processing")
|
257 |
|
258 |
with gr.Tab(label="Image Input"):
|
|
|
259 |
with gr.Row():
|
260 |
with gr.Column():
|
261 |
model_choice = gr.Dropdown(
|
|
|
269 |
)
|
270 |
text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
|
271 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
|
|
272 |
with gr.Column():
|
273 |
output_text = gr.Textbox(label="Output Text", lines=10)
|
274 |
plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=10)
|
|
|
353 |
label="Image Size"
|
354 |
)
|
355 |
file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
|
|
|
356 |
with gr.Row():
|
357 |
get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
|
|
|
358 |
get_document_btn.click(
|
359 |
+
generate_document,
|
360 |
+
[input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size],
|
361 |
+
gr.File(label="Download Document")
|
362 |
)
|
363 |
|
364 |
demo.launch(debug=True)
|