Spaces:
Sleeping
Sleeping
File size: 1,287 Bytes
ddf7acc 5523faf e638a74 300310b 5523faf e638a74 5523faf 300310b 5523faf 300310b 5523faf ddf7acc e638a74 5523faf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from PyPDF2 import PdfReader
import io
# Function to convert PDF to text (handles both byte data and file uploads)
def pdf_to_text(file_input):
# If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding)
if isinstance(file_input, bytes):
# Treat it as byte data and convert it to a file-like object
pdf_file = io.BytesIO(file_input)
else:
# If it's a regular PDF file (file upload), open it from the file input
pdf_file = file_input.name # This will get the file path if it's a regular file upload
try:
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
return f"Error while processing the PDF: {str(e)}"
# Gradio interface: allow both file uploads and byte data input
iface = gr.Interface(
fn=pdf_to_text, # Function to call for text extraction
inputs=gr.File(label="Upload PDF or send Byte data"), # File input
outputs="text", # Output the extracted text
title="PDF to Text Conversion",
description="Upload a PDF file or send byte data (Base64 encoded) to extract its text."
)
if __name__ == "__main__":
iface.launch() |