Spaces:
Sleeping
Sleeping
File size: 1,892 Bytes
1e5a29f b2204f0 1e5a29f 12854bd 1e5a29f 12854bd b2204f0 12854bd 1e5a29f 12854bd 1505e07 697dc58 1e5a29f cc238ea c1f2ce7 46060d2 c1f2ce7 b7edde4 1e5a29f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import requests
import streamlit as st
from bs4 import BeautifulSoup
from transformers import pipeline
@st.cache_resource
def load_model():
return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
model = load_model()
def extract_article_text(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
article = soup.find('div', class_='article__content')
if article:
return article.get_text(strip=True)
else:
return "Article not found."
except Exception as e:
return f"Error: {e}"
st.title("Tag Detection from CNN News articles")
st.write("Enter a CNN News article URL.")
news_url = st.text_input("CNN Article URL:", placeholder="Example: https://edition.cnn.com/2024/12/19/science/stonehenge-monument-early-farmers/index.html")
categories = [
"Politics",
"Economy",
"Sports",
"Weather",
"Health",
"Technology",
"Culture",
"Science",
"Education",
"Entertainment",
"Business",
"Environment",
"Crime",
"Travel",
"International Relations",
]
if st.button("Get tags"):
if news_url.strip():
article_text = extract_article_text(news_url)
if article_text.startswith("Error") or article_text.startswith("Could not"):
st.write(article_text)
else:
st.write("**Extracted Article Text:**")
st.write(article_text[:500] + "...")
with st.spinner("Analyzing... Please wait."):
result = model(article_text[:2000], categories, multi_label=True)
st.write("**Predicted Tags:**")
for label, score in zip(result["labels"], result["scores"]):
st.write(f"- {label}: {score:.2f}")
else:
st.write("Please enter a valid news URL.") |