import base64 import io import json import os import re import tempfile import zipfile import shutil import atexit from datetime import datetime from pathlib import Path import gradio as gr import requests from PIL import Image try: import pdf2image PDF2IMAGE_AVAILABLE = True except ImportError: PDF2IMAGE_AVAILABLE = False try: import fitz # PyMuPDF PYGMUPDF_AVAILABLE = True except ImportError: PYGMUPDF_AVAILABLE = False # API Configuration API_URL = "https://t707h6d9q6oftbx3.aistudio-app.com/layout-parsing" TOKEN = os.getenv("API_TOKEN") # Temporary directory management temp_dirs = [] def cleanup(): """Clean up temporary directories""" for dir_path in temp_dirs: try: shutil.rmtree(dir_path) except: pass atexit.register(cleanup) def image_to_base64(image_path): """Convert image to base64 encoding""" if not image_path or not Path(image_path).exists(): return "" with open(image_path, "rb") as image_file: return f"data:image/png;base64,{base64.b64encode(image_file.read()).decode('utf-8')}" # Get current directory current_dir = Path(__file__).parent logo_path = current_dir / "pp-structurev3.png" logo_base64 = image_to_base64(logo_path) CSS = """ :root { --sand-color: #FAF9F6; --white: #ffffff; --shadow: 0 4px 6px rgba(0, 0, 0, 0.1); --text-color: #F3F4F7; --black:#000000; --link-hover: #2b6cb0; --content-width: 1200px; } body { display: flex; justify-content: center; background-color: var(--sand-color); color: var(--text-color); font-family: Arial, sans-serif; } .gradio-container { max-width: var(--content-width) !important; width: 100% !important; margin: 20px auto; padding: 20px; background-color: var(--white); } #component-0, #tabs, #settings { background-color: var(--white) !important; padding: 15px; } .upload-section { width: 100%; margin: 0 auto 30px; padding: 20px; background-color: var(--sand-color) !important; border-radius: 8px; box-shadow: var(--shadow); } .center-content { display: flex; flex-direction: column; align-items: center; text-align: center; margin-bottom: 20px; } .header { margin-bottom: 30px; width: 100%; } .logo-container { width: 100%; margin-bottom: 20px; } .logo-img { width: 100%; max-width: var(--content-width); margin: 0 auto; display: block; } .nav-bar { display: flex; justify-content: center; background-color: var(--white); padding: 15px 0; box-shadow: var(--shadow); margin-bottom: 20px; } .nav-links { display: flex; gap: 30px; width: 100%; justify-content: center; } .nav-link { color: var(--black); text-decoration: none; font-weight: bold; font-size: 24px; transition: color 0.2s; } .nav-link:hover { color: var(--link-hover); text-decoration: none; } .result-container { display: flex; gap: 20px; margin-bottom: 30px; width: 100%; } .pdf-preview { flex: 1; min-width: 0; } .markdown-result { flex: 1; min-width: 0; } .gallery-container { width: 100% !important; } .gallery-item { width: 100% !important; height: auto !important; aspect-ratio: auto !important; } button { background-color: var(--text-color) !important; color: var(--black) !important; border: none !important; border-radius: 4px; padding: 8px 16px; } button:hover { opacity: 0.8 !important; } .radio-group { margin-bottom: 15px !important; } .file-download { margin-top: 15px !important; } .loader { border: 5px solid #f3f3f3; border-top: 5px solid #3498db; border-radius: 50%; width: 50px; height: 50px; animation: spin 1s linear infinite; margin: 20px auto; } @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } } .loader-container { text-align: center; margin: 20px 0; } /* PDF Viewer specific styles */ .pdf-viewer-container { width: 100%; height: 600px; border: 1px solid #ddd; margin-top: 15px; background-color: #f9f9f9; display: flex; justify-content: center; align-items: center; } .pdf-viewer-container embed { width: 100%; height: 100%; } .no-preview-message { color: #666; font-size: 16px; text-align: center; padding: 20px; } """ def clean_markdown_text(text): """Clean markdown text from HTML tags and excessive newlines""" if not text: return "" text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\n{3,}', '\n\n', text) return text.strip() def pdf_to_images(pdf_path, dpi=150): """Convert PDF to list of images with fallback methods""" images = [] if PDF2IMAGE_AVAILABLE: try: images = pdf2image.convert_from_path(pdf_path, dpi=dpi) return images except Exception as e: print(f"pdf2image conversion failed: {str(e)}") if PYGMUPDF_AVAILABLE: try: doc = fitz.open(pdf_path) for page in doc: pix = page.get_pixmap(dpi=dpi) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) return images except Exception as e: print(f"PyMuPDF conversion failed: {str(e)}") return None def create_pdf_preview(pdf_path): """Create PDF preview HTML with embedded viewer""" if not pdf_path or not Path(pdf_path).exists(): return '
No PDF file available
' try: # Convert PDF to base64 for embedding with open(pdf_path, "rb") as f: pdf_bytes = f.read() pdf_base64 = base64.b64encode(pdf_bytes).decode("ascii") return f"""
""" except Exception as e: print(f"Failed to create PDF preview: {str(e)}") return '
PDF preview generation failed
' def process_file(file_path, file_type): """Process uploaded file with API""" try: if not file_path: raise ValueError("Please upload a file first") if file_type == "pdf" and not str(file_path).lower().endswith('.pdf'): raise ValueError("Please upload a valid PDF file") if file_type == "image" and not str(file_path).lower().endswith(('.jpg', '.jpeg', '.png')): raise ValueError("Please upload a valid image file (JPG/JPEG/PNG)") # Read file content with open(file_path, "rb") as f: file_bytes = f.read() # Call API for processing file_data = base64.b64encode(file_bytes).decode("ascii") headers = { "Authorization": f"token {TOKEN}", "Content-Type": "application/json" } response = requests.post( API_URL, json={"file": file_data, "fileType": 0 if file_type == "pdf" else 1}, headers=headers, timeout=60 ) response.raise_for_status() # Parse API response result = response.json() layout_results = result.get("result", {}).get("layoutParsingResults", []) markdown_contents = [] clean_markdown_contents = [] for res in layout_results: markdown = res.get("markdown", {}) original = markdown if isinstance(markdown, str) else markdown.get("text", "") markdown_contents.append(original) clean_markdown_contents.append(clean_markdown_text(original)) # Generate preview content if file_type == "pdf": images = pdf_to_images(file_path) pdf_preview = create_pdf_preview(file_path) else: images = [Image.open(file_path)] pdf_preview = '
Image file preview
' return { "original_file": file_path, "file_type": file_type, "markdown_contents": markdown_contents, "clean_markdown_contents": clean_markdown_contents, "pdf_images": images, "pdf_preview": pdf_preview, "api_response": result } except requests.exceptions.RequestException as e: raise gr.Error(f"API request failed: {str(e)}") except Exception as e: raise gr.Error(f"Error processing file: {str(e)}") def create_zip_file(results): """Create ZIP file with all analysis results""" try: if not results: raise ValueError("No results to export") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") zip_filename = f"analysis_results_{timestamp}.zip" temp_dir = tempfile.mkdtemp() temp_dirs.append(temp_dir) zip_path = os.path.join(temp_dir, zip_filename) with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: # Add original file original_path = results.get("original_file", "") if original_path and Path(original_path).exists(): zipf.write(original_path, f"original/{Path(original_path).name}") # Add markdown content for i, (orig_md, clean_md) in enumerate(zip( results.get("markdown_contents", []), results.get("clean_markdown_contents", []) )): if orig_md: zipf.writestr(f"markdown/original/page_{i+1}.md", orig_md) if clean_md: zipf.writestr(f"markdown/clean/page_{i+1}.md", clean_md) # Add API response api_response = results.get("api_response", {}) zipf.writestr("api_response.json", json.dumps(api_response, indent=2, ensure_ascii=False)) # Add PDF images if available if results.get("file_type") == "pdf" and results.get("pdf_images"): for i, img in enumerate(results["pdf_images"]): img_path = os.path.join(temp_dir, f"page_{i+1}.jpg") img.save(img_path, "JPEG", quality=85) zipf.write(img_path, f"images/page_{i+1}.jpg") return zip_path except Exception as e: raise gr.Error(f"Error creating ZIP file: {str(e)}") def export_markdown(results): """Export markdown content to file""" try: if not results: raise ValueError("No results to export") markdowns = results.get("markdown_contents", []) if not markdowns: raise gr.Error("No markdown content to export") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"markdown_export_{timestamp}.md" content = "\n\n".join(markdowns) temp_dir = tempfile.mkdtemp() temp_dirs.append(temp_dir) file_path = os.path.join(temp_dir, filename) with open(file_path, 'w', encoding='utf-8') as f: f.write(content) return file_path except Exception as e: raise gr.Error(f"Error exporting markdown: {str(e)}") with gr.Blocks(css=CSS, title="Document Analysis System") as demo: results_state = gr.State() # Header with logo with gr.Column(elem_classes=["logo-container"]): gr.HTML(f'') # Navigation bar with gr.Row(elem_classes=["nav-bar"]): gr.HTML(""" """) # Upload section with gr.Column(elem_classes=["upload-section"]): file_type = gr.Radio( ["pdf", "image"], label="File Type", value="pdf", interactive=True ) file_input = gr.File( label="Upload Document", file_types=[".pdf", ".jpg", ".jpeg", ".png"], type="filepath" ) process_btn = gr.Button("Analyze Document", variant="primary") loading_spinner = gr.Column( visible=False, elem_classes=["loader-container"] ) with loading_spinner: gr.HTML("""

Processing, please wait...

""") # Results display section with gr.Row(elem_classes=["result-container"]): with gr.Column(elem_classes=["pdf-preview"]): gr.Markdown("### Original Document Preview") pdf_preview = gr.HTML(label="PDF Preview") pdf_gallery = gr.Gallery( label="PDF Pages", show_label=False, elem_classes=["gallery-container"], columns=[1], object_fit="contain", visible=False ) with gr.Column(elem_classes=["markdown-result"]): with gr.Row(elem_classes=["radio-group"]): display_mode = gr.Radio( ["Original Markdown", "Cleaned Text"], label="Display Mode", value="Original Markdown", interactive=True ) markdown_display = gr.Markdown(label="Analysis Results") # Download section with gr.Column(elem_classes=["download-section"]): gr.Markdown("### Result Export") with gr.Row(): download_md_btn = gr.Button("Download Markdown", variant="secondary") download_all_btn = gr.Button("Download Full Results (ZIP)", variant="primary") download_file = gr.File(visible=False, label="Download File") # Interaction logic def toggle_spinner(): return gr.update(visible=True) def hide_spinner(): return gr.update(visible=False) # In update_display() def update_display(results): if not results: return [ gr.update(value='
No file to display
'), gr.update(visible=False), gr.update(value="No content"), gr.update(value=[]) ] images = results.get("pdf_images", []) show_gallery = bool(images) display_content = results["markdown_contents"][0] if results.get("markdown_contents") else "No content" return [ gr.update(value='
Preview rendered as images
'), gr.update(visible=show_gallery), gr.update(value=display_content), gr.update(value=images if show_gallery else []) ] process_btn.click( toggle_spinner, outputs=[loading_spinner] ).then( process_file, inputs=[file_input, file_type], outputs=[results_state] ).then( hide_spinner, outputs=[loading_spinner] ).then( update_display, inputs=[results_state], outputs=[pdf_preview, pdf_gallery, markdown_display, pdf_gallery] ) display_mode.change( lambda mode, res: ( res["markdown_contents"][0] if mode == "Original Markdown" else res["clean_markdown_contents"][0] ) if res and res.get("markdown_contents") else "No content", inputs=[display_mode, results_state], outputs=[markdown_display] ) download_md_btn.click( export_markdown, inputs=[results_state], outputs=[download_file] ).then( lambda: gr.update(visible=True), outputs=[download_file] ) download_all_btn.click( create_zip_file, inputs=[results_state], outputs=[download_file] ).then( lambda: gr.update(visible=True), outputs=[download_file] ) if __name__ == "__main__": # Check dependencies if not PDF2IMAGE_AVAILABLE: print("Warning: pdf2image not available, PDF to image conversion limited") if not PYGMUPDF_AVAILABLE: print("Warning: PyMuPDF not available, PDF fallback conversion disabled") demo.launch( server_name="0.0.0.0", server_port=7860, share=True, favicon_path=str(logo_path) if logo_path.exists() else None )