prithivMLmods commited on
Commit
c60dba3
·
verified ·
1 Parent(s): 852f994

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -155
app.py CHANGED
@@ -71,7 +71,7 @@ def qwen_inference(model_name, media_input, text_input=None):
71
 
72
  if isinstance(media_input, str):
73
  media_path = media_input
74
- if media_path.endswith(tuple([i for i in image_extensions.keys()])):
75
  media_type = "image"
76
  else:
77
  try:
@@ -83,10 +83,7 @@ def qwen_inference(model_name, media_input, text_input=None):
83
  {
84
  "role": "user",
85
  "content": [
86
- {
87
- "type": media_type,
88
- media_type: media_path
89
- },
90
  {"type": "text", "text": text_input},
91
  ],
92
  }
@@ -114,7 +111,6 @@ def qwen_inference(model_name, media_input, text_input=None):
114
  buffer = ""
115
  for new_text in streamer:
116
  buffer += new_text
117
- # Remove <|im_end|> or similar tokens from the output
118
  buffer = buffer.replace("<|im_end|>", "")
119
  yield buffer
120
 
@@ -146,33 +142,19 @@ def generate_pdf(media_path, plain_text, font_choice, font_size, line_spacing, a
146
  styles["Normal"].fontName = font_choice
147
  styles["Normal"].fontSize = int(font_size)
148
  styles["Normal"].leading = int(font_size) * line_spacing
149
- styles["Normal"].alignment = {
150
- "Left": 0,
151
- "Center": 1,
152
- "Right": 2,
153
- "Justified": 4
154
- }[alignment]
155
 
156
  # Register font
157
  font_path = f"font/{font_choice}"
158
  pdfmetrics.registerFont(TTFont(font_choice, font_path))
159
 
160
  story = []
161
-
162
- # Add image with size adjustment
163
- image_sizes = {
164
- "Small": (200, 200),
165
- "Medium": (400, 400),
166
- "Large": (600, 600)
167
- }
168
  img = RLImage(media_path, width=image_sizes[image_size][0], height=image_sizes[image_size][1])
169
  story.append(img)
170
  story.append(Spacer(1, 12))
171
-
172
- # Add plain text output
173
  text = Paragraph(plain_text, styles["Normal"])
174
  story.append(text)
175
-
176
  doc.build(story)
177
  return filename
178
 
@@ -180,17 +162,9 @@ def generate_docx(media_path, plain_text, font_choice, font_size, line_spacing,
180
  """Generates a DOCX document."""
181
  filename = f"output_{uuid.uuid4()}.docx"
182
  doc = docx.Document()
183
-
184
- # Add image with size adjustment
185
- image_sizes = {
186
- "Small": docx.shared.Inches(2),
187
- "Medium": docx.shared.Inches(4),
188
- "Large": docx.shared.Inches(6)
189
- }
190
  doc.add_picture(media_path, width=image_sizes[image_size])
191
  doc.add_paragraph()
192
-
193
- # Add plain text output
194
  paragraph = doc.add_paragraph()
195
  paragraph.paragraph_format.line_spacing = line_spacing
196
  paragraph.paragraph_format.alignment = {
@@ -202,138 +176,84 @@ def generate_docx(media_path, plain_text, font_choice, font_size, line_spacing,
202
  run = paragraph.add_run(plain_text)
203
  run.font.name = font_choice
204
  run.font.size = docx.shared.Pt(int(font_size))
205
-
206
  doc.save(filename)
207
  return filename
208
 
209
- # Updated CSS for output styling
210
  css = """
211
- #output_text, #plain_text_output {
212
- height: 200px;
213
- overflow: auto;
214
- border: 1px solid #ccc;
215
- }
216
- .submit-btn {
217
- background-color: #cf3434 !important;
218
- color: white !important;
219
- }
220
- .submit-btn:hover {
221
- background-color: #ff2323 !important;
222
- }
223
- .download-btn {
224
- background-color: #35a6d6 !important;
225
- color: white !important;
226
- }
227
- .download-btn:hover {
228
- background-color: #22bcff !important;
229
- }
230
  """
231
 
232
- # Gradio app setup with optimized UI
233
  with gr.Blocks(css=css) as demo:
234
- gr.Markdown("# Qwen2VL Models: Vision and Language Processing")
235
 
236
- with gr.Tab(label="Image Input"):
237
- with gr.Row():
238
- with gr.Column():
239
- model_choice = gr.Dropdown(
240
- label="Model Selection",
241
- choices=list(MODEL_OPTIONS.keys()),
242
- value="Latex OCR"
243
- )
244
- input_media = gr.File(
245
- label="Upload Image", type="filepath"
246
- )
247
- text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
248
- submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
249
-
250
- with gr.Column():
251
- output_text = gr.Textbox(label="Output Text", lines=5, elem_id="output_text")
252
- plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=5, elem_id="plain_text_output")
253
-
254
- with gr.Row():
255
- gr.Examples(
256
- examples=[
257
- ["examples/1.png", "summarize the letter", "Text Analogy Ocrtest"],
258
- ["examples/2.jpg", "Summarize the full image in detail", "Latex OCR"],
259
- ["examples/3.png", "Describe the photo", "Qwen2VL Base"],
260
- ["examples/4.png", "summarize and solve the problem", "Math Prase"],
261
- ],
262
- inputs=[input_media, text_input, model_choice],
263
- outputs=[output_text, plain_text_output],
264
- fn=lambda img, question, model: qwen_inference(model, img, question),
265
- cache_examples=False,
266
- )
267
-
268
- with gr.Accordion("Document Generation Options", open=False):
269
- line_spacing = gr.Dropdown(
270
- choices=[0.5, 1.0, 1.15, 1.5, 2.0, 2.5, 3.0],
271
- value=1.5,
272
- label="Line Spacing"
273
- )
274
- font_size = gr.Dropdown(
275
- choices=["8", "10", "12", "14", "16", "18", "20", "22", "24"],
276
- value="18",
277
- label="Font Size"
278
- )
279
- font_choice = gr.Dropdown(
280
- choices=[
281
- "DejaVuMathTeXGyre.ttf",
282
- "FiraCode-Medium.ttf",
283
- "InputMono-Light.ttf",
284
- "JetBrainsMono-Thin.ttf",
285
- "ProggyCrossed Regular Mac.ttf",
286
- "SourceCodePro-Black.ttf",
287
- "arial.ttf",
288
- "calibri.ttf",
289
- "mukta-malar-extralight.ttf",
290
- "noto-sans-arabic-medium.ttf",
291
- "times new roman.ttf",
292
- "ANGSA.ttf",
293
- "Book-Antiqua.ttf",
294
- "CONSOLA.TTF",
295
- "COOPBL.TTF",
296
- "Rockwell-Bold.ttf",
297
- "Candara Light.TTF",
298
- "Carlito-Regular.ttf Carlito-Regular.ttf",
299
- "Castellar.ttf",
300
- "Courier New.ttf",
301
- "LSANS.TTF",
302
- "Lucida Bright Regular.ttf",
303
- "TRTempusSansITC.ttf",
304
- "Verdana.ttf",
305
- "bell-mt.ttf",
306
- "eras-itc-light.ttf",
307
- "fonnts.com-aptos-light.ttf",
308
- "georgia.ttf",
309
- "segoeuithis.ttf",
310
- "youyuan.TTF",
311
- "TfPonetoneExpanded-7BJZA.ttf",
312
- ],
313
- value="youyuan.TTF",
314
- label="Font Choice"
315
- )
316
- alignment = gr.Dropdown(
317
- choices=["Left", "Center", "Right", "Justified"],
318
- value="Justified",
319
- label="Text Alignment"
320
- )
321
- image_size = gr.Dropdown(
322
- choices=["Small", "Medium", "Large"],
323
- value="Small",
324
- label="Image Size"
325
- )
326
- file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
327
- get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
328
-
329
- submit_btn.click(
330
- qwen_inference, [model_choice, input_media, text_input], [output_text]
331
- ).then(
332
- lambda output_text: format_plain_text(output_text), [output_text], [plain_text_output]
333
- )
334
 
335
- get_document_btn.click(
336
- generate_document, [input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size], gr.File(label="Download Document")
337
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  demo.launch(debug=True)
 
71
 
72
  if isinstance(media_input, str):
73
  media_path = media_input
74
+ if media_path.endswith(tuple(image_extensions.keys())):
75
  media_type = "image"
76
  else:
77
  try:
 
83
  {
84
  "role": "user",
85
  "content": [
86
+ { "type": media_type, media_type: media_path },
 
 
 
87
  {"type": "text", "text": text_input},
88
  ],
89
  }
 
111
  buffer = ""
112
  for new_text in streamer:
113
  buffer += new_text
 
114
  buffer = buffer.replace("<|im_end|>", "")
115
  yield buffer
116
 
 
142
  styles["Normal"].fontName = font_choice
143
  styles["Normal"].fontSize = int(font_size)
144
  styles["Normal"].leading = int(font_size) * line_spacing
145
+ styles["Normal"].alignment = {"Left": 0, "Center": 1, "Right": 2, "Justified": 4}[alignment]
 
 
 
 
 
146
 
147
  # Register font
148
  font_path = f"font/{font_choice}"
149
  pdfmetrics.registerFont(TTFont(font_choice, font_path))
150
 
151
  story = []
152
+ image_sizes = {"Small": (200, 200), "Medium": (400, 400), "Large": (600, 600)}
 
 
 
 
 
 
153
  img = RLImage(media_path, width=image_sizes[image_size][0], height=image_sizes[image_size][1])
154
  story.append(img)
155
  story.append(Spacer(1, 12))
 
 
156
  text = Paragraph(plain_text, styles["Normal"])
157
  story.append(text)
 
158
  doc.build(story)
159
  return filename
160
 
 
162
  """Generates a DOCX document."""
163
  filename = f"output_{uuid.uuid4()}.docx"
164
  doc = docx.Document()
165
+ image_sizes = {"Small": docx.shared.Inches(2), "Medium": docx.shared.Inches(4), "Large": docx.shared.Inches(6)}
 
 
 
 
 
 
166
  doc.add_picture(media_path, width=image_sizes[image_size])
167
  doc.add_paragraph()
 
 
168
  paragraph = doc.add_paragraph()
169
  paragraph.paragraph_format.line_spacing = line_spacing
170
  paragraph.paragraph_format.alignment = {
 
176
  run = paragraph.add_run(plain_text)
177
  run.font.name = font_choice
178
  run.font.size = docx.shared.Pt(int(font_size))
 
179
  doc.save(filename)
180
  return filename
181
 
182
+ # Minimal CSS for compact output
183
  css = """
184
+ #output { height: 400px; overflow: auto; border: 1px solid #ccc; }
185
+ .submit-btn { background-color: #cf3434 !important; color: white !important; }
186
+ .submit-btn:hover { background-color: #ff2323 !important; }
187
+ .download-btn { background-color: #35a6d6 !important; color: white !important; }
188
+ .download-btn:hover { background-color: #22bcff !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  """
190
 
 
191
  with gr.Blocks(css=css) as demo:
192
+ gr.Markdown("# Qwen2VL: Vision & Language Processing")
193
 
194
+ # Single column layout for compact UI
195
+ model_choice = gr.Dropdown(
196
+ label="Model Selection",
197
+ choices=list(MODEL_OPTIONS.keys()),
198
+ value="Latex OCR"
199
+ )
200
+ input_media = gr.File(label="Upload Image", type="filepath")
201
+ text_input = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
202
+ submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
203
+ output_text = gr.Textbox(label="Output Text", lines=8)
204
+ plain_text_output = gr.Textbox(label="Standardized Plain Text", lines=8)
205
+
206
+ submit_btn.click(
207
+ qwen_inference, [model_choice, input_media, text_input], [output_text]
208
+ ).then(
209
+ lambda output_text: format_plain_text(output_text),
210
+ [output_text],
211
+ [plain_text_output]
212
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+ gr.Markdown("### Document Generation Options")
215
+ line_spacing = gr.Dropdown(
216
+ choices=[0.5, 1.0, 1.15, 1.5, 2.0, 2.5, 3.0],
217
+ value=1.5,
218
+ label="Line Spacing"
219
+ )
220
+ font_size = gr.Dropdown(
221
+ choices=["8", "10", "12", "14", "16", "18", "20", "22", "24"],
222
+ value="18",
223
+ label="Font Size"
224
+ )
225
+ font_choice = gr.Dropdown(
226
+ choices=[
227
+ "DejaVuMathTeXGyre.ttf", "FiraCode-Medium.ttf", "InputMono-Light.ttf",
228
+ "JetBrainsMono-Thin.ttf", "ProggyCrossed Regular Mac.ttf", "SourceCodePro-Black.ttf",
229
+ "arial.ttf", "calibri.ttf", "mukta-malar-extralight.ttf", "noto-sans-arabic-medium.ttf",
230
+ "times new roman.ttf", "ANGSA.ttf", "Book-Antiqua.ttf", "CONSOLA.TTF", "COOPBL.TTF",
231
+ "Rockwell-Bold.ttf", "Candara Light.TTF", "Carlito-Regular.ttf", "Castellar.ttf",
232
+ "Courier New.ttf", "LSANS.TTF", "Lucida Bright Regular.ttf", "TRTempusSansITC.ttf",
233
+ "Verdana.ttf", "bell-mt.ttf", "eras-itc-light.ttf", "fonnts.com-aptos-light.ttf",
234
+ "georgia.ttf", "segoeuithis.ttf", "youyuan.TTF", "TfPonetoneExpanded-7BJZA.ttf"
235
+ ],
236
+ value="youyuan.TTF",
237
+ label="Font Choice"
238
+ )
239
+ alignment = gr.Dropdown(
240
+ choices=["Left", "Center", "Right", "Justified"],
241
+ value="Justified",
242
+ label="Text Alignment"
243
+ )
244
+ image_size = gr.Dropdown(
245
+ choices=["Small", "Medium", "Large"],
246
+ value="Small",
247
+ label="Image Size"
248
+ )
249
+ file_format = gr.Radio(["pdf", "docx"], label="File Format", value="pdf")
250
+ get_document_btn = gr.Button(value="Get Document", elem_classes="download-btn")
251
+ document_output = gr.File(label="Download Document")
252
+
253
+ get_document_btn.click(
254
+ generate_document,
255
+ [input_media, output_text, file_format, font_choice, font_size, line_spacing, alignment, image_size],
256
+ document_output
257
+ )
258
 
259
  demo.launch(debug=True)