File size: 1,419 Bytes
29c7654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39c23a8
29c7654
39c23a8
 
 
 
 
 
29c7654
 
 
 
 
 
7f633bf
 
 
29c7654
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from transformers import BartTokenizer, BartForConditionalGeneration
import PyPDF2

# Load BART model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Function to summarize PDF text
def summarize_pdf(pdf_file):
    # Read the PDF file
    pdf_reader = PyPDF2.PdfReader(pdf_file.name)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    # Summarize the text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=300, min_length=80, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Convert the summary to bullet points
    bullet_points = summary.split('. ')
    bullet_summary = "\n".join([f"• {point}" for point in bullet_points if point])
    
    return bullet_summary

# Interface for Gradio
iface = gr.Interface(
    fn=summarize_pdf,
    inputs="file",
    outputs="text",
    title="Live and free PDF Summarizer for your documents",
    description="Upload a PDF file here and get a bullet-point summary of its content.",
    theme="Monochrome",
    live=True,
    capture_session=True,
)

# Launch the Gradio interface
iface.launch()