import gradio as gr from PyPDF2 import PdfReader import io # Function to convert PDF to text (handles both byte data and file uploads) def pdf_to_text(file_input): # If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding) if isinstance(file_input, bytes): # Treat it as byte data and convert it to a file-like object pdf_file = io.BytesIO(file_input) else: # If it's a regular PDF file (file upload), open it from the file input pdf_file = file_input.name # This will get the file path if it's a regular file upload try: reader = PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() return text except Exception as e: return f"Error while processing the PDF: {str(e)}" # Gradio interface: allow both file uploads and byte data input iface = gr.Interface( fn=pdf_to_text, # Function to call for text extraction inputs=gr.File(label="Upload PDF or send Byte data"), # File input outputs="text", # Output the extracted text title="PDF to Text Conversion", description="Upload a PDF file or send byte data (Base64 encoded) to extract its text." ) if __name__ == "__main__": iface.launch()