Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
from transformers import AutoModel | |
#from peft import PeftModel, PeftConfig | |
from transformers import AutoModelForCausalLM | |
from Scrapper_Summarizer import get_full_article_dawn, get_full_article_tnews, get_full_article_brecorder, summarizer | |
# summarizer = pipeline("summarization", model="mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization") | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Function to scrape full article and summarize it | |
def get_full_article(url): | |
try: | |
response = requests.get(url, verify=False) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
content_div = soup.find('div', class_='story__content') | |
if content_div: | |
paragraphs = content_div.find_all('p') | |
full_text = ' '.join([para.get_text(strip=True) for para in paragraphs]) | |
# Limiting text length for summarization | |
summary_obj = summarizer(full_text[:1020]) | |
# Convert the summary object to a string | |
summary = summary_obj[0]['summary_text'] if summary_obj else "" | |
st.success("Summary generated successfully!") | |
return summary | |
else: | |
st.error("Content not found in the article.") | |
return "Content not found." | |
except Exception as e: | |
st.error(f"Error fetching the article: {e}") | |
return "Error fetching the article." | |
def article_sum(): | |
# App title | |
st.title("π° Article Summarizer") | |
st.write("Provide the URL of the article you'd like summarized below, and we'll fetch and summarize it for you!") | |
# Input URL from user | |
url = st.text_input("Enter the article URL:", "") | |
# Sidebar with buttons for different sources | |
st.sidebar.title("Choose a Source") | |
# Button for "The News" | |
if st.sidebar.button("The News"): | |
if url: | |
with st.spinner('Fetching and summarizing the article from The News...'): | |
full_text = get_full_article_tnews(url) | |
summary_obj = summarizer(full_text[:1020]) | |
# Convert the summary object to a string | |
summary = summary_obj[0]['summary_text'] if summary_obj else "" | |
st.write(summary) | |
else: | |
st.sidebar.error("Please enter the URL of an article from The News.") | |
# Button for "The Dawn" | |
if st.sidebar.button("The Dawn"): | |
if url: | |
with st.spinner('Fetching and summarizing the article from The Dawn...'): | |
full_text = get_full_article_dawn(url) | |
summary_obj = summarizer(full_text[:1020]) | |
# Convert the summary object to a string | |
summary = summary_obj[0]['summary_text'] if summary_obj else "" | |
st.write(summary) | |
else: | |
st.sidebar.error("Please enter the URL of an article from The Dawn.") | |
# Button for "Business Recorder" | |
if st.sidebar.button("Business Recorder"): | |
if url: | |
with st.spinner('Fetching and summarizing the article from Business Recorder...'): | |
full_text= get_full_article_brecorder(url) | |
summary_obj = summarizer(full_text[:1020]) | |
# Convert the summary object to a string | |
summary = summary_obj[0]['summary_text'] if summary_obj else "" | |
st.write(summary) | |
else: | |
st.sidebar.error("Please enter the URL of an article from Business Recorder.") | |
# Sidebar details and credits | |
st.sidebar.title("About") | |
st.sidebar.write( | |
"This utility fetches articles from a given URL and summarizes them using a pre-trained summarization model.") | |
st.sidebar.markdown("### Model Used") | |
st.sidebar.info("Model: `sshleifer/distilbart-cnn-12-6` (BART-based summarizer)") | |
st.sidebar.markdown("---") | |
st.sidebar.write("Created by Strategy") | |