Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PyPDF2 import PdfReader | |
import io | |
# Function to convert PDF to text (handles both byte data and file uploads) | |
def pdf_to_text(file_input): | |
# If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding) | |
if isinstance(file_input, bytes): | |
# Treat it as byte data and convert it to a file-like object | |
pdf_file = io.BytesIO(file_input) | |
else: | |
# If it's a regular PDF file (file upload), open it from the file input | |
pdf_file = file_input.name # This will get the file path if it's a regular file upload | |
try: | |
reader = PdfReader(pdf_file) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
except Exception as e: | |
return f"Error while processing the PDF: {str(e)}" | |
# Gradio interface: allow both file uploads and byte data input | |
iface = gr.Interface( | |
fn=pdf_to_text, # Function to call for text extraction | |
inputs=gr.File(label="Upload PDF or send Byte data"), # File input | |
outputs="text", # Output the extracted text | |
title="PDF to Text Conversion", | |
description="Upload a PDF file or send byte data (Base64 encoded) to extract its text." | |
) | |
if __name__ == "__main__": | |
iface.launch() |