Spaces:
Sleeping
Sleeping
import requests | |
import streamlit as st | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
def load_model(): | |
return pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
model = load_model() | |
def extract_article_text(url): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
article = soup.find('div', class_='article__content') | |
if article: | |
return article.get_text(strip=True) | |
else: | |
return "Article not found." | |
except Exception as e: | |
return f"Error: {e}" | |
st.title("Tag Detection from CNN News articles") | |
st.write("Enter a CNN News article URL.") | |
news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html") | |
categories = [ | |
"Politics", | |
"Economy", | |
"Sports", | |
"Weather", | |
"Health", | |
"Technology", | |
"Culture", | |
"Science", | |
"Education", | |
"Entertainment", | |
"Business", | |
"Environment", | |
"Crime", | |
"Travel", | |
"International Relations", | |
] | |
if st.button("Get tags"): | |
if news_url.strip(): | |
article_text = extract_article_text(news_url) | |
if article_text.startswith("Error") or article_text.startswith("Could not"): | |
st.write(article_text) | |
else: | |
st.write("**Extracted Article Text:**") | |
st.write(article_text[:500] + "...") | |
with st.spinner("Analyzing... Please wait."): | |
result = model(article_text[:2000], categories, multi_label=True) | |
st.write("**Predicted Tags:**") | |
for label, score in zip(result["labels"], result["scores"]): | |
st.write(f"- {label}: {score:.2f}") | |
else: | |
st.write("Please enter a valid news URL.") |