import gradio as gr from transformers import BartTokenizer, BartForConditionalGeneration import PyPDF2 # Load BART model and tokenizer model_name = "facebook/bart-large-cnn" tokenizer = BartTokenizer.from_pretrained(model_name) model = BartForConditionalGeneration.from_pretrained(model_name) # Function to summarize PDF text def summarize_pdf(pdf_file): # Read the PDF file pdf_reader = PyPDF2.PdfReader(pdf_file.name) text = "" for page in pdf_reader.pages: text += page.extract_text() # Summarize the text inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model.generate(inputs, max_length=300, min_length=80, length_penalty=2.0, num_beams=4, early_stopping=True) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) # Convert the summary to bullet points bullet_points = summary.split('. ') bullet_summary = "\n".join([f"• {point}" for point in bullet_points if point]) return bullet_summary # Interface for Gradio iface = gr.Interface( fn=summarize_pdf, inputs="file", outputs="text", title="Live and free PDF Summarizer for your documents", description="Upload a PDF file here and get a bullet-point summary of its content.", theme="Monochrome", live=True, capture_session=True, ) # Launch the Gradio interface iface.launch()