prithivMLmods commited on
Commit
62a17ee
·
verified ·
1 Parent(s): 1a3e75e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -120
app.py CHANGED
@@ -129,98 +129,6 @@ def fetch_image(image_input, min_pixels: int = None, max_pixels: int = None):
129
  image = image.resize((width, height), Image.LANCZOS)
130
  return image
131
 
132
- def is_arabic_text(text: str) -> bool:
133
- if not text:
134
- return False
135
- header_pattern = r'^#{1,6}\s+(.+)$'
136
- paragraph_pattern = r'^(?!#{1,6}\s|!\[|```|\||\s*[-*+]\s|\s*\d+\.\s)(.+)$'
137
- content_text = []
138
- for line in text.split('\n'):
139
- line = line.strip()
140
- if not line:
141
- continue
142
- header_match = re.match(header_pattern, line, re.MULTILINE)
143
- if header_match:
144
- content_text.append(header_match.group(1))
145
- continue
146
- if re.match(paragraph_pattern, line, re.MULTILINE):
147
- content_text.append(line)
148
- if not content_text:
149
- return False
150
- combined_text = ' '.join(content_text)
151
- arabic_chars = 0
152
- total_chars = 0
153
- for char in combined_text:
154
- if char.isalpha():
155
- total_chars += 1
156
- if ('\u0600' <= char <= '\u06FF') or ('\u0750' <= char <= '\u077F') or ('\u08A0' <= char <= '\u08FF'):
157
- arabic_chars += 1
158
- return total_chars > 0 and (arabic_chars / total_chars) > 0.5
159
-
160
- def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = 'text') -> str:
161
- import base64
162
- from io import BytesIO
163
- markdown_lines = []
164
- try:
165
- sorted_items = sorted(layout_data, key=lambda x: (x.get('bbox', [0, 0, 0, 0])[1], x.get('bbox', [0, 0, 0, 0])[0]))
166
- for item in sorted_items:
167
- category = item.get('category', '')
168
- text = item.get(text_key, '')
169
- bbox = item.get('bbox', [])
170
- if category == 'Picture':
171
- if bbox and len(bbox) == 4:
172
- try:
173
- x1, y1, x2, y2 = bbox
174
- x1, y1 = max(0, int(x1)), max(0, int(y1))
175
- x2, y2 = min(image.width, int(x2)), min(image.height, int(y2))
176
- if x2 > x1 and y2 > y1:
177
- cropped_img = image.crop((x1, y1, x2, y2))
178
- buffer = BytesIO()
179
- cropped_img.save(buffer, format='PNG')
180
- img_data = base64.b64encode(buffer.getvalue()).decode()
181
- markdown_lines.append(f"![Image](data:image/png;base64,{img_data})\n")
182
- else:
183
- markdown_lines.append("![Image](Image region detected)\n")
184
- except Exception as e:
185
- print(f"Error processing image region: {e}")
186
- markdown_lines.append("![Image](Image detected)\n")
187
- else:
188
- markdown_lines.append("![Image](Image detected)\n")
189
- elif not text:
190
- continue
191
- elif category == 'Title':
192
- markdown_lines.append(f"# {text}\n")
193
- elif category == 'Section-header':
194
- markdown_lines.append(f"## {text}\n")
195
- elif category == 'Text':
196
- markdown_lines.append(f"{text}\n")
197
- elif category == 'List-item':
198
- markdown_lines.append(f"- {text}\n")
199
- elif category == 'Table':
200
- if text.strip().startswith('<'):
201
- markdown_lines.append(f"{text}\n")
202
- else:
203
- markdown_lines.append(f"**Table:** {text}\n")
204
- elif category == 'Formula':
205
- if text.strip().startswith('$') or '\\' in text:
206
- markdown_lines.append(f"$$\n{text}\n$$\n")
207
- else:
208
- markdown_lines.append(f"**Formula:** {text}\n")
209
- elif category == 'Caption':
210
- markdown_lines.append(f"*{text}*\n")
211
- elif category == 'Footnote':
212
- markdown_lines.append(f"^{text}^\n")
213
- elif category in ['Page-header', 'Page-footer']:
214
- continue
215
- else:
216
- markdown_lines.append(f"{text}\n")
217
- markdown_lines.append("")
218
- except Exception as e:
219
- print(f"Error converting to markdown: {e}")
220
- return str(layout_data)
221
- return "\n".join(markdown_lines)
222
-
223
-
224
  @spaces.GPU
225
  def inference(model_name: str, image: Image.Image, text: str, max_new_tokens: int = 1024) -> str:
226
  try:
@@ -288,28 +196,23 @@ def process_image(
288
  'original_image': image,
289
  'raw_output': "",
290
  'layout_result': None,
291
- 'markdown_content': None
292
  }
293
  buffer = ""
294
  for raw_output, _ in inference(model_name, image, prompt, max_new_tokens):
295
  buffer = raw_output
296
  result['raw_output'] = buffer
297
  yield result
298
- try:
299
  json_match = re.search(r'```json\s*([\s\S]+?)\s*```', buffer)
300
  json_str = json_match.group(1) if json_match else buffer
301
- layout_data = json.loads(json_str)
302
-
303
  result['layout_result'] = layout_data
304
- try:
305
- markdown_content = layoutjson2md(image, layout_data, text_key='text')
306
- result['markdown_content'] = markdown_content
307
- except Exception as e:
308
- print(f"Error generating markdown: {e}")
309
- result['markdown_content'] = buffer
310
  except json.JSONDecodeError:
311
- print("Failed to parse JSON output, using raw output")
312
- result['markdown_content'] = buffer
 
 
 
313
  yield result
314
  except Exception as e:
315
  print(f"Error processing image: {e}")
@@ -317,8 +220,7 @@ def process_image(
317
  result = {
318
  'original_image': image,
319
  'raw_output': f"Error processing image: {str(e)}",
320
- 'layout_result': None,
321
- 'markdown_content': f"Error processing image: {str(e)}"
322
  }
323
  yield result
324
 
@@ -386,46 +288,40 @@ def create_gradio_interface():
386
  with gr.Tabs():
387
  with gr.Tab("📝 Extracted Content"):
388
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
389
- with gr.Accordion("(Result.md)", open=False):
390
- markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
391
  with gr.Tab("📋 Layout JSON"):
392
  json_output = gr.JSON(label="Layout Analysis Results", value=None)
393
  def process_document(model_name, file_path, max_tokens, min_pix, max_pix):
394
  try:
395
  if not file_path:
396
- return "Please upload an image.", "Please upload an image.", None
397
  image, status = load_file_for_preview(file_path)
398
  if image is None:
399
- return status, status, None
400
  for result in process_image(model_name, image, min_pixels=int(min_pix) if min_pix else None, max_pixels=int(max_pix) if max_pix else None, max_new_tokens=max_tokens):
401
  raw_output = result['raw_output']
402
- markdown_content = result['markdown_content'] or raw_output
403
- if is_arabic_text(markdown_content):
404
- markdown_update = gr.update(value=markdown_content, rtl=True)
405
- else:
406
- markdown_update = markdown_content
407
- yield raw_output, markdown_update, result['layout_result']
408
  except Exception as e:
409
  error_msg = f"Error processing document: {str(e)}"
410
  print(error_msg)
411
  traceback.print_exc()
412
- yield error_msg, error_msg, None
413
  def handle_file_upload(file_path):
414
  if not file_path:
415
  return None, "No file loaded"
416
  image, page_info = load_file_for_preview(file_path)
417
  return image, page_info
418
  def clear_all():
419
- return None, None, "No file loaded", "", "Click 'Process Document' to see extracted content...", None
420
  file_input.change(handle_file_upload, inputs=[file_input], outputs=[image_preview, output])
421
  process_btn.click(
422
  process_document,
423
  inputs=[model_choice, file_input, max_new_tokens, min_pixels, max_pixels],
424
- outputs=[output, markdown_output, json_output]
425
  )
426
  clear_btn.click(
427
  clear_all,
428
- outputs=[file_input, image_preview, output, markdown_output, json_output]
429
  )
430
  return demo
431
 
 
129
  image = image.resize((width, height), Image.LANCZOS)
130
  return image
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  @spaces.GPU
133
  def inference(model_name: str, image: Image.Image, text: str, max_new_tokens: int = 1024) -> str:
134
  try:
 
196
  'original_image': image,
197
  'raw_output': "",
198
  'layout_result': None,
 
199
  }
200
  buffer = ""
201
  for raw_output, _ in inference(model_name, image, prompt, max_new_tokens):
202
  buffer = raw_output
203
  result['raw_output'] = buffer
204
  yield result
205
+ try:
206
  json_match = re.search(r'```json\s*([\s\S]+?)\s*```', buffer)
207
  json_str = json_match.group(1) if json_match else buffer
208
+ layout_data = json.loads(json_str)
 
209
  result['layout_result'] = layout_data
 
 
 
 
 
 
210
  except json.JSONDecodeError:
211
+ print("Failed to parse JSON output")
212
+ result['layout_result'] = {"error": "Failed to parse JSON"}
213
+ except Exception as e:
214
+ print(f"Error processing layout: {e}")
215
+ result['layout_result'] = {"error": str(e)}
216
  yield result
217
  except Exception as e:
218
  print(f"Error processing image: {e}")
 
220
  result = {
221
  'original_image': image,
222
  'raw_output': f"Error processing image: {str(e)}",
223
+ 'layout_result': {"error": str(e)}
 
224
  }
225
  yield result
226
 
 
288
  with gr.Tabs():
289
  with gr.Tab("📝 Extracted Content"):
290
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
 
 
291
  with gr.Tab("📋 Layout JSON"):
292
  json_output = gr.JSON(label="Layout Analysis Results", value=None)
293
  def process_document(model_name, file_path, max_tokens, min_pix, max_pix):
294
  try:
295
  if not file_path:
296
+ return "Please upload an image.", None
297
  image, status = load_file_for_preview(file_path)
298
  if image is None:
299
+ return status, None
300
  for result in process_image(model_name, image, min_pixels=int(min_pix) if min_pix else None, max_pixels=int(max_pix) if max_pix else None, max_new_tokens=max_tokens):
301
  raw_output = result['raw_output']
302
+ layout_result = result['layout_result']
303
+ yield raw_output, layout_result
 
 
 
 
304
  except Exception as e:
305
  error_msg = f"Error processing document: {str(e)}"
306
  print(error_msg)
307
  traceback.print_exc()
308
+ yield error_msg, {"error": str(e)}
309
  def handle_file_upload(file_path):
310
  if not file_path:
311
  return None, "No file loaded"
312
  image, page_info = load_file_for_preview(file_path)
313
  return image, page_info
314
  def clear_all():
315
+ return None, None, "No file loaded", None
316
  file_input.change(handle_file_upload, inputs=[file_input], outputs=[image_preview, output])
317
  process_btn.click(
318
  process_document,
319
  inputs=[model_choice, file_input, max_new_tokens, min_pixels, max_pixels],
320
+ outputs=[output, json_output]
321
  )
322
  clear_btn.click(
323
  clear_all,
324
+ outputs=[file_input, image_preview, output, json_output]
325
  )
326
  return demo
327