File size: 1,287 Bytes
ddf7acc
5523faf
e638a74
300310b
5523faf
 
 
 
 
 
e638a74
5523faf
 
300310b
5523faf
 
 
 
 
 
 
 
300310b
5523faf
 
 
 
 
 
 
 
ddf7acc
e638a74
5523faf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from PyPDF2 import PdfReader
import io

# Function to convert PDF to text (handles both byte data and file uploads)
def pdf_to_text(file_input):
    # If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding)
    if isinstance(file_input, bytes):
        # Treat it as byte data and convert it to a file-like object
        pdf_file = io.BytesIO(file_input)
    else:
        # If it's a regular PDF file (file upload), open it from the file input
        pdf_file = file_input.name  # This will get the file path if it's a regular file upload

    try:
        reader = PdfReader(pdf_file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        return f"Error while processing the PDF: {str(e)}"

# Gradio interface: allow both file uploads and byte data input
iface = gr.Interface(
    fn=pdf_to_text,  # Function to call for text extraction
    inputs=gr.File(label="Upload PDF or send Byte data"),  # File input
    outputs="text",  # Output the extracted text
    title="PDF to Text Conversion",
    description="Upload a PDF file or send byte data (Base64 encoded) to extract its text."
)

if __name__ == "__main__":
    iface.launch()