import requests
import streamlit as st
from bs4 import BeautifulSoup
from transformers import pipeline


@st.cache_resource
def load_model():
    return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

model = load_model()

def extract_article_text(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        article = soup.find('div', class_='article__content')
        if article:
            return article.get_text(strip=True)
        else:
            return "Article not found."
    except Exception as e:
        return f"Error: {e}"

st.title("Tag Detection from CNN News articles")
st.write("Enter a CNN News article URL.")

news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html")
categories = [
    "Politics",
    "Economy",
    "Sports",
    "Weather",
    "Health",
    "Technology",
    "Culture",
    "Science",
    "Education",
    "Entertainment",
    "Business",
    "Environment",
    "Crime",
    "Travel",
    "International Relations",
]

if st.button("Get tags"):
    if news_url.strip():
        article_text = extract_article_text(news_url)

        if article_text.startswith("Error") or article_text.startswith("Could not"):
            st.write(article_text)
        else:
            st.write("**Extracted Article Text:**")
            st.write(article_text[:500] + "...")

            with st.spinner("Analyzing... Please wait."):
                result = model(article_text[:2000], categories, multi_label=True)

            st.write("**Predicted Tags:**")
            for label, score in zip(result["labels"], result["scores"]):
                st.write(f"- {label}: {score:.2f}")
    else:
        st.write("Please enter a valid news URL.")