Anupam251272's picture
Update app.py
a1b258c verified
import nltk
nltk.download('punkt')
# Third cell - Main implementation
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from newspaper import Article
import gradio as gr
import warnings
warnings.filterwarnings('ignore')
# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Initialize model and tokenizer
model_name = "google/pegasus-large"
try:
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)
model = model.to(device)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
def fetch_article_text(url):
"""Fetch and extract text from a given URL"""
try:
article = Article(url)
article.download()
article.parse()
return article.text
except Exception as e:
return f"Error fetching article: {e}"
def summarize_text(text, max_length=150, min_length=40):
"""Generate summary using the Pegasus model"""
try:
# Tokenize with padding and truncation
inputs = tokenizer(
text,
max_length=1024,
truncation=True,
padding="max_length",
return_tensors="pt"
).to(device)
# Generate summary
summary_ids = model.generate(
inputs["input_ids"],
max_length=max_length,
min_length=min_length,
length_penalty=2.0,
num_beams=4,
early_stopping=True
)
# Decode and return summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
except Exception as e:
return f"Error generating summary: {e}"
def process_input(input_text, input_type, max_length=150, min_length=40):
"""Process either URL or direct text input"""
try:
if input_type == "URL":
text = fetch_article_text(input_text)
if "Error" in text:
return text
else:
text = input_text
if not text or len(text.strip()) < 100:
return "Error: Input text is too short or empty."
return summarize_text(text, max_length, min_length)
except Exception as e:
return f"Error processing input: {e}"
# Create Gradio interface
def create_interface():
with gr.Blocks(title="Research Article Summarizer") as interface:
gr.Markdown("# Research Article Summarizer")
gr.Markdown("Enter either a URL or paste the article text directly.")
with gr.Row():
input_type = gr.Radio(
choices=["URL", "Text"],
value="URL",
label="Input Type"
)
with gr.Row():
input_text = gr.Textbox(
lines=5,
placeholder="Enter URL or paste article text here...",
label="Input"
)
with gr.Row():
max_length = gr.Slider(
minimum=50,
maximum=500,
value=150,
step=10,
label="Maximum Summary Length"
)
min_length = gr.Slider(
minimum=20,
maximum=200,
value=40,
step=10,
label="Minimum Summary Length"
)
with gr.Row():
submit_btn = gr.Button("Generate Summary")
with gr.Row():
output = gr.Textbox(
lines=5,
label="Generated Summary"
)
submit_btn.click(
fn=process_input,
inputs=[input_text, input_type, max_length, min_length],
outputs=output
)
return interface
# Launch the interface
demo = create_interface()
demo.launch(debug=True, share=True)