DeepDiveDev's picture
Update app.py
5523faf verified
raw
history blame
1.29 kB
import gradio as gr
from PyPDF2 import PdfReader
import io
# Function to convert PDF to text (handles both byte data and file uploads)
def pdf_to_text(file_input):
# If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding)
if isinstance(file_input, bytes):
# Treat it as byte data and convert it to a file-like object
pdf_file = io.BytesIO(file_input)
else:
# If it's a regular PDF file (file upload), open it from the file input
pdf_file = file_input.name # This will get the file path if it's a regular file upload
try:
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
return f"Error while processing the PDF: {str(e)}"
# Gradio interface: allow both file uploads and byte data input
iface = gr.Interface(
fn=pdf_to_text, # Function to call for text extraction
inputs=gr.File(label="Upload PDF or send Byte data"), # File input
outputs="text", # Output the extracted text
title="PDF to Text Conversion",
description="Upload a PDF file or send byte data (Base64 encoded) to extract its text."
)
if __name__ == "__main__":
iface.launch()