final-project / app.py
herztard's picture
add limits
46060d2
raw
history blame
1.89 kB
import requests
import streamlit as st
from bs4 import BeautifulSoup
from transformers import pipeline
@st.cache_resource
def load_model():
return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
model = load_model()
def extract_article_text(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
article = soup.find('div', class_='article__content')
if article:
return article.get_text(strip=True)
else:
return "Article not found."
except Exception as e:
return f"Error: {e}"
st.title("Tag Detection from CNN News articles")
st.write("Enter a CNN News article URL.")
news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html")
categories = [
"Politics",
"Economy",
"Sports",
"Weather",
"Health",
"Technology",
"Culture",
"Science",
"Education",
"Entertainment",
"Business",
"Environment",
"Crime",
"Travel",
"International Relations",
]
if st.button("Get tags"):
if news_url.strip():
article_text = extract_article_text(news_url)
if article_text.startswith("Error") or article_text.startswith("Could not"):
st.write(article_text)
else:
st.write("**Extracted Article Text:**")
st.write(article_text[:500] + "...")
with st.spinner("Analyzing... Please wait."):
result = model(article_text[:2000], categories, multi_label=True)
st.write("**Predicted Tags:**")
for label, score in zip(result["labels"], result["scores"]):
st.write(f"- {label}: {score:.2f}")
else:
st.write("Please enter a valid news URL.")