Spaces:

PaddlePaddle
/

PP-StructureV3_Online_Demo

Running

App Files Files Community

XinyueZhou commited on May 11

Commit

14c5401

verified ·

1 Parent(s): 4898b9f

Update app.py

Browse files

Files changed (1) hide show

app.py +231 -51

app.py CHANGED Viewed

@@ -5,17 +5,55 @@ import os
 import re
 import tempfile
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
 import requests
 from PIL import Image
-import pdf2image
 API_URL = "https://t707h6d9q6oftbx3.aistudio-app.com/layout-parsing"
 TOKEN = os.getenv("API_TOKEN")
 CSS = """
 :root {
@@ -178,29 +216,106 @@ button:hover {
     text-align: center;
     margin: 20px 0;
 }
 """
 def clean_markdown_text(text):
     if not text:
         return ""
     text = re.sub(r'<[^>]+>', '', text)
     text = re.sub(r'\n{3,}', '\n\n', text)
     return text.strip()
-def pdf_to_images(pdf_path):
     try:
-        images = pdf2image.convert_from_path(pdf_path)
-        return [image for image in images]
-    except:
-        return None
 def process_file(file_path, file_type):
     try:
         with open(file_path, "rb") as f:
             file_bytes = f.read()
         file_data = base64.b64encode(file_bytes).decode("ascii")
         headers = {
             "Authorization": f"token {TOKEN}",
@@ -215,6 +330,7 @@ def process_file(file_path, file_type):
         )
         response.raise_for_status()
         result = response.json()
         layout_results = result.get("result", {}).get("layoutParsingResults", [])
@@ -222,69 +338,94 @@ def process_file(file_path, file_type):
         clean_markdown_contents = []
         for res in layout_results:
             markdown = res.get("markdown", {})
-            if isinstance(markdown, str):
-                original = markdown
-            elif isinstance(markdown, dict):
-                original = markdown.get("text", "")
             markdown_contents.append(original)
             clean_markdown_contents.append(clean_markdown_text(original))
         if file_type == "pdf":
             images = pdf_to_images(file_path)
         else:
             images = [Image.open(file_path)]
         return {
             "original_file": file_path,
             "markdown_contents": markdown_contents,
             "clean_markdown_contents": clean_markdown_contents,
             "pdf_images": images,
             "api_response": result
         }
     except Exception as e:
         raise gr.Error(f"Error processing file: {str(e)}")
 def create_zip_file(results):
     try:
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         zip_filename = f"analysis_results_{timestamp}.zip"
         temp_dir = tempfile.mkdtemp()
         zip_path = os.path.join(temp_dir, zip_filename)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
             original_path = results.get("original_file", "")
             if original_path and Path(original_path).exists():
                 zipf.write(original_path, f"original/{Path(original_path).name}")
-            markdowns = results.get("markdown_contents", [])
-            for i, md_content in enumerate(markdowns):
-                if md_content:
-                    zipf.writestr(f"markdown/original/markdown_{i + 1}.md", md_content)
             api_response = results.get("api_response", {})
             zipf.writestr("api_response.json", json.dumps(api_response, indent=2, ensure_ascii=False))
         return zip_path
     except Exception as e:
         raise gr.Error(f"Error creating ZIP file: {str(e)}")
 def export_markdown(results):
     try:
         markdowns = results.get("markdown_contents", [])
         if not markdowns:
             raise gr.Error("No markdown content to export")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"original_markdown_{timestamp}.md"
         content = "\n\n".join(markdowns)
         temp_dir = tempfile.mkdtemp()
         file_path = os.path.join(temp_dir, filename)
         with open(file_path, 'w', encoding='utf-8') as f:
@@ -298,11 +439,11 @@ def export_markdown(results):
 with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
     results_state = gr.State()
-    # 添加logo图片
     with gr.Column(elem_classes=["logo-container"]):
-        gr.Image("pp-structurev3.png", elem_classes=["logo-img"], show_label=False)
-    # 添加导航栏链接
     with gr.Row(elem_classes=["nav-bar"]):
         gr.HTML("""
         <div class="nav-links">
@@ -311,19 +452,20 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
         </div>
         """)
     with gr.Column(elem_classes=["upload-section"]):
         file_type = gr.Radio(
             ["pdf", "image"],
-            label="File type",
             value="pdf",
             interactive=True
         )
         file_input = gr.File(
-            label="Upload document",
             file_types=[".pdf", ".jpg", ".jpeg", ".png"],
             type="filepath"
         )
-        process_btn = gr.Button("Analyze document", variant="primary")
         loading_spinner = gr.Column(
             visible=False,
@@ -332,16 +474,21 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
         with loading_spinner:
             gr.HTML("""
             <div class="loader"></div>
-            <p>Wait...</p>
             """)
     with gr.Row(elem_classes=["result-container"]):
         with gr.Column(elem_classes=["pdf-preview"]):
-            gr.Markdown("### Original document preview")
-            pdf_display = gr.Gallery(
-                label="PDF page",
                 show_label=False,
-                elem_classes=["gallery-container"]
             )
         with gr.Column(elem_classes=["markdown-result"]):
@@ -352,21 +499,49 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
                     value="Original Markdown",
                     interactive=True
                 )
-            markdown_display = gr.HTML(label="Analysis Results")
     with gr.Column(elem_classes=["download-section"]):
         gr.Markdown("### Result Export")
         with gr.Row():
-            download_md_btn = gr.Button("Download Original Markdown", variant="secondary")
-            download_all_btn = gr.Button("Download Complete Analysis Results (ZIP)", variant="primary")
-        download_file = gr.File(visible=False, label="Download file", elem_classes=["file-download"])
     def toggle_spinner():
         return gr.update(visible=True)
     def hide_spinner():
         return gr.update(visible=False)
     process_btn.click(
         toggle_spinner,
@@ -378,21 +553,17 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
     ).then(
         hide_spinner,
         outputs=[loading_spinner]
-    ).success(
-        lambda res: res["pdf_images"] if res and res.get("pdf_images") else [],
-        inputs=[results_state],
-        outputs=[pdf_display]
-    ).success(
-        lambda res: res["markdown_contents"][0] if res and res.get("markdown_contents") else "",
         inputs=[results_state],
-        outputs=[markdown_display]
     )
     display_mode.change(
         lambda mode, res: (
-            res["markdown_contents"][0] if mode == "原始Markdown"
             else res["clean_markdown_contents"][0]
-        ) if res else "",
         inputs=[display_mode, results_state],
         outputs=[markdown_display]
     )
@@ -402,8 +573,7 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
         inputs=[results_state],
         outputs=[download_file]
     ).then(
-        lambda x: gr.update(visible=True),
-        inputs=[download_file],
         outputs=[download_file]
     )
@@ -412,10 +582,20 @@ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
         inputs=[results_state],
         outputs=[download_file]
     ).then(
-        lambda x: gr.update(visible=True),
-        inputs=[download_file],
         outputs=[download_file]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import re
 import tempfile
 import zipfile
+import shutil
+import atexit
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
 import requests
 from PIL import Image
+try:
+    import pdf2image
+    PDF2IMAGE_AVAILABLE = True
+except ImportError:
+    PDF2IMAGE_AVAILABLE = False
+try:
+    import fitz  # PyMuPDF
+    PYGMUPDF_AVAILABLE = True
+except ImportError:
+    PYGMUPDF_AVAILABLE = False
+# API Configuration
 API_URL = "https://t707h6d9q6oftbx3.aistudio-app.com/layout-parsing"
 TOKEN = os.getenv("API_TOKEN")
+# Temporary directory management
+temp_dirs = []
+def cleanup():
+    """Clean up temporary directories"""
+    for dir_path in temp_dirs:
+        try:
+            shutil.rmtree(dir_path)
+        except:
+            pass
+atexit.register(cleanup)
+def image_to_base64(image_path):
+    """Convert image to base64 encoding"""
+    if not image_path or not Path(image_path).exists():
+        return ""
+    with open(image_path, "rb") as image_file:
+        return f"data:image/png;base64,{base64.b64encode(image_file.read()).decode('utf-8')}"
+# Get current directory
+current_dir = Path(__file__).parent
+logo_path = current_dir / "pp-structurev3.png"
+logo_base64 = image_to_base64(logo_path)
 CSS = """
 :root {
     text-align: center;
     margin: 20px 0;
 }
+/* PDF Viewer specific styles */
+.pdf-viewer-container {
+    width: 100%;
+    height: 600px;
+    border: 1px solid #ddd;
+    margin-top: 15px;
+    background-color: #f9f9f9;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+}
+.pdf-viewer-container embed {
+    width: 100%;
+    height: 100%;
+}
+.no-preview-message {
+    color: #666;
+    font-size: 16px;
+    text-align: center;
+    padding: 20px;
+}
 """
 def clean_markdown_text(text):
+    """Clean markdown text from HTML tags and excessive newlines"""
     if not text:
         return ""
     text = re.sub(r'<[^>]+>', '', text)
     text = re.sub(r'\n{3,}', '\n\n', text)
     return text.strip()
+def pdf_to_images(pdf_path, dpi=150):
+    """Convert PDF to list of images with fallback methods"""
+    images = []
+    if PDF2IMAGE_AVAILABLE:
+        try:
+            images = pdf2image.convert_from_path(pdf_path, dpi=dpi)
+            return images
+        except Exception as e:
+            print(f"pdf2image conversion failed: {str(e)}")
+    if PYGMUPDF_AVAILABLE:
+        try:
+            doc = fitz.open(pdf_path)
+            for page in doc:
+                pix = page.get_pixmap(dpi=dpi)
+                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                images.append(img)
+            return images
+        except Exception as e:
+            print(f"PyMuPDF conversion failed: {str(e)}")
+    return None
+def create_pdf_preview(pdf_path):
+    """Create PDF preview HTML with embedded viewer"""
+    if not pdf_path or not Path(pdf_path).exists():
+        return '<div class="no-preview-message">No PDF file available</div>'
     try:
+        # Convert PDF to base64 for embedding
+        with open(pdf_path, "rb") as f:
+            pdf_bytes = f.read()
+        pdf_base64 = base64.b64encode(pdf_bytes).decode("ascii")
+        return f"""
+        <div class="pdf-viewer-container">
+            <embed
+                src="data:application/pdf;base64,{pdf_base64}"
+                type="application/pdf"
+                width="100%"
+                height="100%"
+            >
+        </div>
+        """
+    except Exception as e:
+        print(f"Failed to create PDF preview: {str(e)}")
+        return '<div class="no-preview-message">PDF preview generation failed</div>'
 def process_file(file_path, file_type):
+    """Process uploaded file with API"""
     try:
+        if not file_path:
+            raise ValueError("Please upload a file first")
+        if file_type == "pdf" and not str(file_path).lower().endswith('.pdf'):
+            raise ValueError("Please upload a valid PDF file")
+        if file_type == "image" and not str(file_path).lower().endswith(('.jpg', '.jpeg', '.png')):
+            raise ValueError("Please upload a valid image file (JPG/JPEG/PNG)")
+        # Read file content
         with open(file_path, "rb") as f:
             file_bytes = f.read()
+        # Call API for processing
         file_data = base64.b64encode(file_bytes).decode("ascii")
         headers = {
             "Authorization": f"token {TOKEN}",
         )
         response.raise_for_status()
+        # Parse API response
         result = response.json()
         layout_results = result.get("result", {}).get("layoutParsingResults", [])
         clean_markdown_contents = []
         for res in layout_results:
             markdown = res.get("markdown", {})
+            original = markdown if isinstance(markdown, str) else markdown.get("text", "")
             markdown_contents.append(original)
             clean_markdown_contents.append(clean_markdown_text(original))
+        # Generate preview content
         if file_type == "pdf":
             images = pdf_to_images(file_path)
+            pdf_preview = create_pdf_preview(file_path)
         else:
             images = [Image.open(file_path)]
+            pdf_preview = '<div class="no-preview-message">Image file preview</div>'
         return {
             "original_file": file_path,
+            "file_type": file_type,
             "markdown_contents": markdown_contents,
             "clean_markdown_contents": clean_markdown_contents,
             "pdf_images": images,
+            "pdf_preview": pdf_preview,
             "api_response": result
         }
+    except requests.exceptions.RequestException as e:
+        raise gr.Error(f"API request failed: {str(e)}")
     except Exception as e:
         raise gr.Error(f"Error processing file: {str(e)}")
 def create_zip_file(results):
+    """Create ZIP file with all analysis results"""
     try:
+        if not results:
+            raise ValueError("No results to export")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         zip_filename = f"analysis_results_{timestamp}.zip"
         temp_dir = tempfile.mkdtemp()
+        temp_dirs.append(temp_dir)
         zip_path = os.path.join(temp_dir, zip_filename)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            # Add original file
             original_path = results.get("original_file", "")
             if original_path and Path(original_path).exists():
                 zipf.write(original_path, f"original/{Path(original_path).name}")
+            # Add markdown content
+            for i, (orig_md, clean_md) in enumerate(zip(
+                results.get("markdown_contents", []),
+                results.get("clean_markdown_contents", [])
+            )):
+                if orig_md:
+                    zipf.writestr(f"markdown/original/page_{i+1}.md", orig_md)
+                if clean_md:
+                    zipf.writestr(f"markdown/clean/page_{i+1}.md", clean_md)
+            # Add API response
             api_response = results.get("api_response", {})
             zipf.writestr("api_response.json", json.dumps(api_response, indent=2, ensure_ascii=False))
+            # Add PDF images if available
+            if results.get("file_type") == "pdf" and results.get("pdf_images"):
+                for i, img in enumerate(results["pdf_images"]):
+                    img_path = os.path.join(temp_dir, f"page_{i+1}.jpg")
+                    img.save(img_path, "JPEG", quality=85)
+                    zipf.write(img_path, f"images/page_{i+1}.jpg")
         return zip_path
     except Exception as e:
         raise gr.Error(f"Error creating ZIP file: {str(e)}")
 def export_markdown(results):
+    """Export markdown content to file"""
     try:
+        if not results:
+            raise ValueError("No results to export")
         markdowns = results.get("markdown_contents", [])
         if not markdowns:
             raise gr.Error("No markdown content to export")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"markdown_export_{timestamp}.md"
         content = "\n\n".join(markdowns)
         temp_dir = tempfile.mkdtemp()
+        temp_dirs.append(temp_dir)
         file_path = os.path.join(temp_dir, filename)
         with open(file_path, 'w', encoding='utf-8') as f:
 with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
     results_state = gr.State()
+    # Header with logo
     with gr.Column(elem_classes=["logo-container"]):
+        gr.HTML(f'<img src="{logo_base64}" class="logo-img">')
+    # Navigation bar
     with gr.Row(elem_classes=["nav-bar"]):
         gr.HTML("""
         <div class="nav-links">
         </div>
         """)
+    # Upload section
     with gr.Column(elem_classes=["upload-section"]):
         file_type = gr.Radio(
             ["pdf", "image"],
+            label="File Type",
             value="pdf",
             interactive=True
         )
         file_input = gr.File(
+            label="Upload Document",
             file_types=[".pdf", ".jpg", ".jpeg", ".png"],
             type="filepath"
         )
+        process_btn = gr.Button("Analyze Document", variant="primary")
         loading_spinner = gr.Column(
             visible=False,
         with loading_spinner:
             gr.HTML("""
             <div class="loader"></div>
+            <p>Processing, please wait...</p>
             """)
+    # Results display section
     with gr.Row(elem_classes=["result-container"]):
         with gr.Column(elem_classes=["pdf-preview"]):
+            gr.Markdown("### Original Document Preview")
+            pdf_preview = gr.HTML(label="PDF Preview")
+            pdf_gallery = gr.Gallery(
+                label="PDF Pages",
                 show_label=False,
+                elem_classes=["gallery-container"],
+                columns=[1],
+                object_fit="contain",
+                visible=False
             )
         with gr.Column(elem_classes=["markdown-result"]):
                     value="Original Markdown",
                     interactive=True
                 )
+            markdown_display = gr.Markdown(label="Analysis Results")
+    # Download section
     with gr.Column(elem_classes=["download-section"]):
         gr.Markdown("### Result Export")
         with gr.Row():
+            download_md_btn = gr.Button("Download Markdown", variant="secondary")
+            download_all_btn = gr.Button("Download Full Results (ZIP)", variant="primary")
+        download_file = gr.File(visible=False, label="Download File")
+    # Interaction logic
     def toggle_spinner():
         return gr.update(visible=True)
     def hide_spinner():
         return gr.update(visible=False)
+    def update_display(results):
+        """Update all display components with processed results"""
+        if not results:
+            return [
+                gr.update(value='<div class="no-preview-message">No file to display</div>'),
+                gr.update(visible=False),
+                gr.update(value="No content"),
+                gr.update(value=[])
+            ]
+        # PDF preview
+        pdf_preview = results.get("pdf_preview", '<div class="no-preview-message">Preview generation failed</div>')
+        # Image gallery
+        images = results.get("pdf_images", [])
+        show_gallery = bool(images) and results.get("file_type") == "pdf"
+        # Markdown content
+        display_content = results["markdown_contents"][0] if results.get("markdown_contents") else "No content"
+        return [
+            gr.update(value=pdf_preview),
+            gr.update(visible=show_gallery),
+            gr.update(value=display_content),
+            gr.update(value=images if show_gallery else [])
+        ]
     process_btn.click(
         toggle_spinner,
     ).then(
         hide_spinner,
         outputs=[loading_spinner]
+    ).then(
+        update_display,
         inputs=[results_state],
+        outputs=[pdf_preview, pdf_gallery, markdown_display, pdf_gallery]
     )
     display_mode.change(
         lambda mode, res: (
+            res["markdown_contents"][0] if mode == "Original Markdown"
             else res["clean_markdown_contents"][0]
+        ) if res and res.get("markdown_contents") else "No content",
         inputs=[display_mode, results_state],
         outputs=[markdown_display]
     )
         inputs=[results_state],
         outputs=[download_file]
     ).then(
+        lambda: gr.update(visible=True),
         outputs=[download_file]
     )
         inputs=[results_state],
         outputs=[download_file]
     ).then(
+        lambda: gr.update(visible=True),
         outputs=[download_file]
     )
 if __name__ == "__main__":
+    # Check dependencies
+    if not PDF2IMAGE_AVAILABLE:
+        print("Warning: pdf2image not available, PDF to image conversion limited")
+    if not PYGMUPDF_AVAILABLE:
+        print("Warning: PyMuPDF not available, PDF fallback conversion disabled")
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        favicon_path=str(logo_path) if logo_path.exists() else None
+    )