xfey commited on
Commit
ab40c3e
·
1 Parent(s): 19d9428

[fix] update markdown utilities

Browse files
Files changed (2) hide show
  1. app.py +34 -24
  2. utils/markdown_utils.py +10 -3
app.py CHANGED
@@ -15,6 +15,7 @@ from PIL import Image
15
  from transformers import AutoProcessor, VisionEncoderDecoderModel
16
 
17
  from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
 
18
 
19
  # 读取外部CSS文件
20
  def load_css():
@@ -428,15 +429,36 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
428
  cropped = padded_image[y1:y2, x1:x2]
429
  if cropped.size > 0:
430
  if label == "fig":
431
- # 对于图像区域,直接添加空文本结果
432
- figure_results.append(
433
- {
434
- "label": label,
435
- "bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
436
- "text": "",
437
- "reading_order": reading_order,
438
- }
439
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  else:
441
  # 准备元素进行解析
442
  pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
@@ -479,21 +501,9 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
479
 
480
  def generate_markdown(recognition_results):
481
  """从识别结果生成Markdown内容"""
482
- markdown_parts = []
483
-
484
- for result in recognition_results:
485
- text = result.get("text", "").strip()
486
- label = result.get("label", "")
487
-
488
- if text:
489
- if label == "tab":
490
- # 表格内容
491
- markdown_parts.append(f"\n{text}\n")
492
- else:
493
- # 普通文本内容
494
- markdown_parts.append(text)
495
-
496
- return "\n\n".join(markdown_parts)
497
 
498
  # LaTeX 渲染配置
499
  latex_delimiters = [
 
15
  from transformers import AutoProcessor, VisionEncoderDecoderModel
16
 
17
  from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
18
+ from utils.markdown_utils import MarkdownConverter
19
 
20
  # 读取外部CSS文件
21
  def load_css():
 
429
  cropped = padded_image[y1:y2, x1:x2]
430
  if cropped.size > 0:
431
  if label == "fig":
432
+ # 对于图像区域,提取图像的base64编码
433
+ try:
434
+ # 将裁剪的图像转换为PIL图像
435
+ pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
436
+
437
+ # 转换为base64
438
+ import io
439
+ import base64
440
+ buffered = io.BytesIO()
441
+ pil_crop.save(buffered, format="PNG")
442
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
443
+
444
+ figure_results.append(
445
+ {
446
+ "label": label,
447
+ "bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
448
+ "text": img_base64, # 存储base64编码而不是空字符串
449
+ "reading_order": reading_order,
450
+ }
451
+ )
452
+ except Exception as e:
453
+ logger.error(f"Error encoding figure to base64: {e}")
454
+ figure_results.append(
455
+ {
456
+ "label": label,
457
+ "bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
458
+ "text": "", # 如果编码失败,使用空字符串
459
+ "reading_order": reading_order,
460
+ }
461
+ )
462
  else:
463
  # 准备元素进行解析
464
  pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
 
501
 
502
  def generate_markdown(recognition_results):
503
  """从识别结果生成Markdown内容"""
504
+ # 使用MarkdownConverter来处理所有类型的内容,包括图片
505
+ converter = MarkdownConverter()
506
+ return converter.convert(recognition_results)
 
 
 
 
 
 
 
 
 
 
 
 
507
 
508
  # LaTeX 渲染配置
509
  latex_delimiters = [
utils/markdown_utils.py CHANGED
@@ -226,6 +226,10 @@ class MarkdownConverter:
226
  Convert base64 encoded image to markdown image syntax
227
  """
228
  try:
 
 
 
 
229
  # Determine image format (assuming PNG if not specified)
230
  img_format = "png"
231
  if text.startswith("data:image/"):
@@ -336,7 +340,12 @@ class MarkdownConverter:
336
  label = result.get('label', '')
337
  text = result.get('text', '').strip()
338
 
339
- # Skip empty text
 
 
 
 
 
340
  if not text:
341
  continue
342
 
@@ -345,8 +354,6 @@ class MarkdownConverter:
345
  markdown_content.append(self._handle_heading(text, label))
346
  elif label == 'list':
347
  markdown_content.append(self._handle_list_item(text))
348
- elif label == 'fig':
349
- markdown_content.append(self._handle_figure(text, section_count))
350
  elif label == 'tab':
351
  markdown_content.append(self._handle_table(text))
352
  elif label == 'alg':
 
226
  Convert base64 encoded image to markdown image syntax
227
  """
228
  try:
229
+ # Check if text is empty (fallback case)
230
+ if not text.strip():
231
+ return f"![Figure {section_count}](data:image/png;base64,)\n\n"
232
+
233
  # Determine image format (assuming PNG if not specified)
234
  img_format = "png"
235
  if text.startswith("data:image/"):
 
340
  label = result.get('label', '')
341
  text = result.get('text', '').strip()
342
 
343
+ # 处理图片,即使文本为空也要处理
344
+ if label == 'fig':
345
+ markdown_content.append(self._handle_figure(text, section_count))
346
+ continue
347
+
348
+ # Skip empty text for non-figure elements
349
  if not text:
350
  continue
351
 
 
354
  markdown_content.append(self._handle_heading(text, label))
355
  elif label == 'list':
356
  markdown_content.append(self._handle_list_item(text))
 
 
357
  elif label == 'tab':
358
  markdown_content.append(self._handle_table(text))
359
  elif label == 'alg':