Article_Summarizer / Article_summarizer.py
Muhammad Murtaza Naqi (Assistant Manager - Data Analyst)
supporting files
712d86b
raw
history blame
3.99 kB
import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
from transformers import AutoModel
#from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from Scrapper_Summarizer import get_full_article_dawn, get_full_article_tnews, get_full_article_brecorder, summarizer
# summarizer = pipeline("summarization", model="mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to scrape full article and summarize it
def get_full_article(url):
try:
response = requests.get(url, verify=False)
soup = BeautifulSoup(response.text, 'html.parser')
content_div = soup.find('div', class_='story__content')
if content_div:
paragraphs = content_div.find_all('p')
full_text = ' '.join([para.get_text(strip=True) for para in paragraphs])
# Limiting text length for summarization
summary_obj = summarizer(full_text[:1020])
# Convert the summary object to a string
summary = summary_obj[0]['summary_text'] if summary_obj else ""
st.success("Summary generated successfully!")
return summary
else:
st.error("Content not found in the article.")
return "Content not found."
except Exception as e:
st.error(f"Error fetching the article: {e}")
return "Error fetching the article."
def article_sum():
# App title
st.title("πŸ“° Article Summarizer")
st.write("Provide the URL of the article you'd like summarized below, and we'll fetch and summarize it for you!")
# Input URL from user
url = st.text_input("Enter the article URL:", "")
# Sidebar with buttons for different sources
st.sidebar.title("Choose a Source")
# Button for "The News"
if st.sidebar.button("The News"):
if url:
with st.spinner('Fetching and summarizing the article from The News...'):
full_text = get_full_article_tnews(url)
summary_obj = summarizer(full_text[:1020])
# Convert the summary object to a string
summary = summary_obj[0]['summary_text'] if summary_obj else ""
st.write(summary)
else:
st.sidebar.error("Please enter the URL of an article from The News.")
# Button for "The Dawn"
if st.sidebar.button("The Dawn"):
if url:
with st.spinner('Fetching and summarizing the article from The Dawn...'):
full_text = get_full_article_dawn(url)
summary_obj = summarizer(full_text[:1020])
# Convert the summary object to a string
summary = summary_obj[0]['summary_text'] if summary_obj else ""
st.write(summary)
else:
st.sidebar.error("Please enter the URL of an article from The Dawn.")
# Button for "Business Recorder"
if st.sidebar.button("Business Recorder"):
if url:
with st.spinner('Fetching and summarizing the article from Business Recorder...'):
full_text= get_full_article_brecorder(url)
summary_obj = summarizer(full_text[:1020])
# Convert the summary object to a string
summary = summary_obj[0]['summary_text'] if summary_obj else ""
st.write(summary)
else:
st.sidebar.error("Please enter the URL of an article from Business Recorder.")
# Sidebar details and credits
st.sidebar.title("About")
st.sidebar.write(
"This utility fetches articles from a given URL and summarizes them using a pre-trained summarization model.")
st.sidebar.markdown("### Model Used")
st.sidebar.info("Model: `sshleifer/distilbart-cnn-12-6` (BART-based summarizer)")
st.sidebar.markdown("---")
st.sidebar.write("Created by Strategy")