|
import gradio as gr |
|
import PyPDF2 |
|
import re |
|
|
|
def read_pdf(file): |
|
with open(file.name, 'rb') as f: |
|
reader = PyPDF2.PdfReader(f) |
|
paragraphs = [] |
|
for page in reader.pages: |
|
extracted_text = page.extract_text() |
|
formatted_text = re.sub(r'\n+', '\n', extracted_text) |
|
paragraphs.append(formatted_text.strip()) |
|
|
|
formatted_text = "\n\n".join(paragraphs) |
|
|
|
return formatted_text |
|
|
|
iface = gr.Interface( |
|
read_pdf, |
|
gr.inputs.File(label="Upload a PDF file"), |
|
gr.outputs.Textbox(label="Extracted Text"), |
|
title="PDF Text Extractor", |
|
description="A smooth app that gets text from PDF files🧠", |
|
theme="ParityError/Anime" |
|
) |
|
iface.launch() |
|
|