pdf-convert / app.py
sblumenf's picture
Update app.py
8b0be64 verified
raw
history blame
692 Bytes
import gradio as gr
import PyMuPDF as fitz # Importing PyMuPDF as fitz
# Function to extract text from a PDF
def extract_pdf_text(file):
doc = fitz.open(file.name) # Open the PDF file using PyMuPDF
text = ""
for page in doc:
text += page.get_text() # Extract text from each page
return text
# Gradio interface
output_format_dropdown = gr.Dropdown(
choices=["txt", "pdf", "docx"],
label="Output Format",
default="txt"
)
iface = gr.Interface(
fn=extract_pdf_text,
inputs=gr.File(label="Upload PDF File"),
outputs=[gr.Textbox(label="Extracted Text"), output_format_dropdown],
live=True
)
if __name__ == "__main__":
iface.launch()