Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import PyPDF2 | |
# Load the summarization pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def summarize_pdf(pdf_file): | |
try: | |
# Extract text from the uploaded PDF | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
# Check if text was extracted | |
if not text.strip(): | |
return "β Could not extract text from the PDF. Please upload a valid document." | |
# Summarize the extracted text | |
summary = summarizer(text, max_length=300, min_length=50, do_sample=False) | |
return summary[0]['summary_text'] | |
except Exception as e: | |
return f"β An error occurred: {str(e)}" | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=summarize_pdf, | |
inputs=gr.inputs.File(label="Upload PDF"), | |
outputs=gr.outputs.Textbox(label="Summary"), | |
title="PDF Summarizer", | |
description="Upload a PDF file to extract and summarize its content using state-of-the-art AI." | |
) | |
interface.launch() | |