[fix] update markdown utilities
Browse files- app.py +34 -24
- utils/markdown_utils.py +10 -3
app.py
CHANGED
@@ -15,6 +15,7 @@ from PIL import Image
|
|
15 |
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
16 |
|
17 |
from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
|
|
|
18 |
|
19 |
# 读取外部CSS文件
|
20 |
def load_css():
|
@@ -428,15 +429,36 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
|
|
428 |
cropped = padded_image[y1:y2, x1:x2]
|
429 |
if cropped.size > 0:
|
430 |
if label == "fig":
|
431 |
-
#
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
else:
|
441 |
# 准备元素进行解析
|
442 |
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
@@ -479,21 +501,9 @@ def process_elements(layout_results, padded_image, dims, max_batch_size=16):
|
|
479 |
|
480 |
def generate_markdown(recognition_results):
|
481 |
"""从识别结果生成Markdown内容"""
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
text = result.get("text", "").strip()
|
486 |
-
label = result.get("label", "")
|
487 |
-
|
488 |
-
if text:
|
489 |
-
if label == "tab":
|
490 |
-
# 表格内容
|
491 |
-
markdown_parts.append(f"\n{text}\n")
|
492 |
-
else:
|
493 |
-
# 普通文本内容
|
494 |
-
markdown_parts.append(text)
|
495 |
-
|
496 |
-
return "\n\n".join(markdown_parts)
|
497 |
|
498 |
# LaTeX 渲染配置
|
499 |
latex_delimiters = [
|
|
|
15 |
from transformers import AutoProcessor, VisionEncoderDecoderModel
|
16 |
|
17 |
from utils.utils import prepare_image, parse_layout_string, process_coordinates, ImageDimensions
|
18 |
+
from utils.markdown_utils import MarkdownConverter
|
19 |
|
20 |
# 读取外部CSS文件
|
21 |
def load_css():
|
|
|
429 |
cropped = padded_image[y1:y2, x1:x2]
|
430 |
if cropped.size > 0:
|
431 |
if label == "fig":
|
432 |
+
# 对于图像区域,提取图像的base64编码
|
433 |
+
try:
|
434 |
+
# 将裁剪的图像转换为PIL图像
|
435 |
+
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
436 |
+
|
437 |
+
# 转换为base64
|
438 |
+
import io
|
439 |
+
import base64
|
440 |
+
buffered = io.BytesIO()
|
441 |
+
pil_crop.save(buffered, format="PNG")
|
442 |
+
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
443 |
+
|
444 |
+
figure_results.append(
|
445 |
+
{
|
446 |
+
"label": label,
|
447 |
+
"bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
|
448 |
+
"text": img_base64, # 存储base64编码而不是空字符串
|
449 |
+
"reading_order": reading_order,
|
450 |
+
}
|
451 |
+
)
|
452 |
+
except Exception as e:
|
453 |
+
logger.error(f"Error encoding figure to base64: {e}")
|
454 |
+
figure_results.append(
|
455 |
+
{
|
456 |
+
"label": label,
|
457 |
+
"bbox": [orig_x1, orig_y1, orig_x2, orig_y2],
|
458 |
+
"text": "", # 如果编码失败,使用空字符串
|
459 |
+
"reading_order": reading_order,
|
460 |
+
}
|
461 |
+
)
|
462 |
else:
|
463 |
# 准备元素进行解析
|
464 |
pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
|
|
|
501 |
|
502 |
def generate_markdown(recognition_results):
|
503 |
"""从识别结果生成Markdown内容"""
|
504 |
+
# 使用MarkdownConverter来处理所有类型的内容,包括图片
|
505 |
+
converter = MarkdownConverter()
|
506 |
+
return converter.convert(recognition_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
|
508 |
# LaTeX 渲染配置
|
509 |
latex_delimiters = [
|
utils/markdown_utils.py
CHANGED
@@ -226,6 +226,10 @@ class MarkdownConverter:
|
|
226 |
Convert base64 encoded image to markdown image syntax
|
227 |
"""
|
228 |
try:
|
|
|
|
|
|
|
|
|
229 |
# Determine image format (assuming PNG if not specified)
|
230 |
img_format = "png"
|
231 |
if text.startswith("data:image/"):
|
@@ -336,7 +340,12 @@ class MarkdownConverter:
|
|
336 |
label = result.get('label', '')
|
337 |
text = result.get('text', '').strip()
|
338 |
|
339 |
-
#
|
|
|
|
|
|
|
|
|
|
|
340 |
if not text:
|
341 |
continue
|
342 |
|
@@ -345,8 +354,6 @@ class MarkdownConverter:
|
|
345 |
markdown_content.append(self._handle_heading(text, label))
|
346 |
elif label == 'list':
|
347 |
markdown_content.append(self._handle_list_item(text))
|
348 |
-
elif label == 'fig':
|
349 |
-
markdown_content.append(self._handle_figure(text, section_count))
|
350 |
elif label == 'tab':
|
351 |
markdown_content.append(self._handle_table(text))
|
352 |
elif label == 'alg':
|
|
|
226 |
Convert base64 encoded image to markdown image syntax
|
227 |
"""
|
228 |
try:
|
229 |
+
# Check if text is empty (fallback case)
|
230 |
+
if not text.strip():
|
231 |
+
return f"\n\n"
|
232 |
+
|
233 |
# Determine image format (assuming PNG if not specified)
|
234 |
img_format = "png"
|
235 |
if text.startswith("data:image/"):
|
|
|
340 |
label = result.get('label', '')
|
341 |
text = result.get('text', '').strip()
|
342 |
|
343 |
+
# 处理图片,即使文本为空也要处理
|
344 |
+
if label == 'fig':
|
345 |
+
markdown_content.append(self._handle_figure(text, section_count))
|
346 |
+
continue
|
347 |
+
|
348 |
+
# Skip empty text for non-figure elements
|
349 |
if not text:
|
350 |
continue
|
351 |
|
|
|
354 |
markdown_content.append(self._handle_heading(text, label))
|
355 |
elif label == 'list':
|
356 |
markdown_content.append(self._handle_list_item(text))
|
|
|
|
|
357 |
elif label == 'tab':
|
358 |
markdown_content.append(self._handle_table(text))
|
359 |
elif label == 'alg':
|