Spaces:

randeom
/

word_cloud_news

Sleeping

App Files Files Community

word_cloud_news / app.py

randeom

Create app.py

dbec3ef verified about 1 year ago

raw

history blame

3.19 kB

	import streamlit as st
	from wordcloud import WordCloud
	import requests
	import xml.etree.ElementTree as ET
	from io import BytesIO
	from datetime import datetime, timedelta

	# Set page configuration
	st.set_page_config(page_title="Word Cloud from News Headlines", layout="wide")

	# Custom CSS for unique design and to remove the white bar on top
	st.markdown("""
	<style>
	.stApp {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	}
	.css-18e3th9 {
	background-color: rgba(255, 255, 255, 0.1);
	padding: 20px;
	border-radius: 10px;
	}
	.css-1d391kg {
	background-color: rgba(255, 255, 255, 0.1);
	padding: 20px;
	border-radius: 10px;
	}
	.st-emotion-cache-18ni7ap {
	display: none;
	}
	.stButton button {
	color: black !important;
	background-color: #667eea;
	border-radius: 10px;
	padding: 10px 20px;
	font-weight: bold;
	}
	.headline-container {
	background-color: rgba(255, 255, 255, 0.1);
	padding: 20px;
	border-radius: 10px;
	margin-top: 20px;
	}
	.st-emotion-cache-7ym5gk {
	color: black !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Title and description
	st.title("Word Cloud from News Headlines")
	st.markdown("### Generating a word cloud from live news headlines")

	# Sidebar for user inputs
	st.sidebar.title("Customize Your Word Cloud")
	bg_color = st.sidebar.color_picker("Background Color", "#ffffff")
	max_words = st.sidebar.slider("Maximum Number of Words", 10, 200, 100)
	keyword = st.sidebar.text_input("Search Keyword (Optional)")
	date_range = st.sidebar.date_input("Date Range", [datetime.now() - timedelta(days=7), datetime.now()])

	# Function to fetch news headlines from Google News RSS feed
	def fetch_news_headlines(keyword=None, date_range=None):
	url = "https://news.google.com/rss"
	if keyword:
	url += f"/search?q={keyword}"
	response = requests.get(url)
	root = ET.fromstring(response.content)
	headlines = [item.find('title').text for item in root.findall('./channel/item')]
	return headlines

	# Generate word cloud
	headlines = fetch_news_headlines(keyword, date_range)
	if headlines:
	wordcloud_text = ' '.join(headlines)
	wordcloud = WordCloud(width=800, height=400, background_color=bg_color, max_words=max_words).generate(wordcloud_text)
	st.image(wordcloud.to_array(), use_column_width=True)

	# Download option
	img = BytesIO()
	wordcloud.to_image().save(img, format='PNG')
	st.download_button(label="Download Word Cloud", data=img, file_name="wordcloud.png", mime="image/png")

	# Display headlines
	st.markdown("### Fetched Headlines")
	with st.expander("Show Headlines"):
	st.markdown('<div class="headline-container">', unsafe_allow_html=True)
	for headline in headlines:
	st.markdown(f"- {headline}")
	st.markdown('</div>', unsafe_allow_html=True)
	else:
	st.warning("No headlines fetched. Please try again later.")

	# Footer
	st.markdown("""
	<hr>
	<div style="text-align: center;">
	Created by randeom
	</div>
	""", unsafe_allow_html=True)