webcrawler / trend_crawl.py
Add1E's picture
Update trend_crawl.py
f9e4b54 verified
raw
history blame
2.63 kB
from trendspy import Trends
import streamlit as st
from datetime import datetime, timezone, timedelta
TREND_TOPICS = {
1: "Autos and Vehicles",
2: "Beauty and Fashion",
3: "Business and Finance",
20: "Climate",
4: "Entertainment",
5: "Food and Drink",
6: "Games",
7: "Health",
8: "Hobbies and Leisure",
9: "Jobs and Education",
10: "Law and Government",
11: "Other",
13: "Pets and Animals",
14: "Politics",
15: "Science",
16: "Shopping",
17: "Sports",
18: "Technology",
19: "Travel and Transportation"
}
def get_trends_base():
return Trends()
def process_trends_for_country(country_code, trends_list, tr):
trends_json = {}
if country_code not in trends_json:
trends_json[country_code] = {"All categories" : {}}
for trend in trends_list:
timestamp = convert_to_datetime(trend.started_timestamp[0])
current_time = datetime.now(timezone.utc)
if current_time - timestamp > timedelta(days=2):
continue
category = None
for topic_id in trend.topics:
if topic_id in TREND_TOPICS:
category = TREND_TOPICS[topic_id]
break
if category is None:
category = TREND_TOPICS[11]
if category not in trends_json[country_code]:
trends_json[country_code][category] = {}
topic_name = trend.keyword
try:
news = tr.trending_now_news_by_ids(trend.news_tokens, max_news=3)
articles = [
{"title": article.title, "href": article.url}
for article in news
]
except Exception as e:
articles=[]
trends_json[country_code]["All categories"][topic_name] = {
"searchQueries": trend.volume,
"articles": articles,
"is_trend_finished": trend.is_trend_finished,
}
trends_json[country_code][category][topic_name] = {
"searchQueries": trend.volume,
"articles": articles,
}
return trends_json
def convert_to_datetime( raw_time):
"""Converts time in seconds to a datetime object with UTC timezone, if it exists."""
return datetime.fromtimestamp(raw_time, tz=timezone.utc) if raw_time else None
def get_trends(country):
tr = get_trends_base()
trends = tr.trending_now(geo=country)
trends_json = process_trends_for_country(country, trends, tr)
all_categories = trends_json[country]["All categories"]
print(trends)
return trends_json