import requests import streamlit as st from bs4 import BeautifulSoup from transformers import pipeline @st.cache_resource def load_model(): return pipeline("zero-shot-classification", model="facebook/bart-large-mnli") model = load_model() def extract_article_text(url): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') article = soup.find('div', class_='article__content') if article: return article.get_text(strip=True) else: return "Article not found." except Exception as e: return f"Error: {e}" st.title("Tag Detection from CNN News articles") st.write("Enter a CNN News article URL.") news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html") categories = [ "Politics", "Economy", "Sports", "Weather", "Health", "Technology", "Culture", "Science", "Education", "Entertainment", "Business", "Environment", "Crime", "Travel", "International Relations", ] if st.button("Get tags"): if news_url.strip(): article_text = extract_article_text(news_url) if article_text.startswith("Error") or article_text.startswith("Could not"): st.write(article_text) else: st.write("**Extracted Article Text:**") st.write(article_text[:500] + "...") with st.spinner("Analyzing... Please wait."): result = model(article_text[:2000], categories, multi_label=True) st.write("**Predicted Tags:**") for label, score in zip(result["labels"], result["scores"]): st.write(f"- {label}: {score:.2f}") else: st.write("Please enter a valid news URL.")