import os import gradio as gr import main import shutil def predict_from_pdf(pdf_file): # Create a temporary directory for file uploads upload_dir = "./catalogue/" os.makedirs(upload_dir, exist_ok=True) # Use the provided file path from Gradio's file object dest_file_path = os.path.join(upload_dir, os.path.basename(pdf_file.name)) try: # Save the uploaded file using shutil.copy shutil.copy(pdf_file, dest_file_path) # Check if the file was saved successfully if not os.path.exists(dest_file_path): return None, f"Error: The file {dest_file_path} could not be found or opened." # Process the PDF and retrieve the product info df, response = main.process_pdf_catalog(dest_file_path) return df, response except Exception as e: return None, f"Error processing PDF: {str(e)}" # Define example PDFs pdf_examples = [ ["catalogue/flexpocket.pdf"], ["catalogue/ASICS_Catalog.pdf"], ] demo = gr.Interface( fn=predict_from_pdf, inputs=gr.File(label="Upload PDF Catalog"), outputs=["json", "text"], examples=pdf_examples, title="Open Source PDF Catalog Parser", description="Efficient PDF catalog processing using fitz and OpenLLM", article="Uses PyMuPDF for layout analysis and Llama-CPP for structured extraction" ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True)