broadfield-dev commited on
Commit
70bea74
·
verified ·
1 Parent(s): 1f5e987

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +8 -5
rss_processor.py CHANGED
@@ -17,11 +17,9 @@ HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
17
  REPO_ID = "broadfield-dev/news-rag-db"
18
  LOCAL_DB_DIR = "chroma_db"
19
 
20
- # Explicitly login to Hugging Face Hub
21
  login(token=HF_API_TOKEN)
22
  client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
23
 
24
- # RSS feeds
25
  RSS_FEEDS = [
26
  "https://www.sciencedaily.com/rss/top/science.xml",
27
  "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
@@ -36,6 +34,8 @@ RSS_FEEDS = [
36
  "https://www.scientificamerican.com/rss/",
37
  "https://www.newscientist.com/feed/home/",
38
  "https://www.livescience.com/feeds/all",
 
 
39
  "https://astrostyle.com/feed/",
40
  "https://www.vogue.com/feed/rss",
41
  "https://feeds.bbci.co.uk/news/politics/rss.xml",
@@ -47,6 +47,10 @@ RSS_FEEDS = [
47
  "https://www.sciencedaily.com/rss/matter_energy/physics.xml",
48
  "https://physicsworld.com/feed/",
49
  "https://www.swpc.noaa.gov/rss.xml",
 
 
 
 
50
  "https://feeds.bbci.co.uk/weather/feeds/rss/5day/world/",
51
  "https://www.weather.gov/rss",
52
  "https://www.foxweather.com/rss",
@@ -64,7 +68,6 @@ RSS_FEEDS = [
64
  "https://www.atlasobscura.com/feeds/latest"
65
  ]
66
 
67
- # Embedding model and vector DB
68
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
69
  vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
70
  hf_api = HfApi()
@@ -125,11 +128,11 @@ def categorize_article(text):
125
  logger.error(f"Error categorizing article: {e}")
126
  return "Neutral"
127
 
128
- def process_and_store_articles(articles):
129
  documents = []
130
  for article in articles:
131
  try:
132
- summary = summarize_article(article["description"])
133
  sentiment = categorize_article(article["description"])
134
  doc = Document(
135
  page_content=summary,
 
17
  REPO_ID = "broadfield-dev/news-rag-db"
18
  LOCAL_DB_DIR = "chroma_db"
19
 
 
20
  login(token=HF_API_TOKEN)
21
  client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
22
 
 
23
  RSS_FEEDS = [
24
  "https://www.sciencedaily.com/rss/top/science.xml",
25
  "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
 
34
  "https://www.scientificamerican.com/rss/",
35
  "https://www.newscientist.com/feed/home/",
36
  "https://www.livescience.com/feeds/all",
37
+ "https://www.hindustantimes.com/feed/horoscope/rss",
38
+ "https://www.washingtonpost.com/wp-srv/style/horoscopes/rss.xml",
39
  "https://astrostyle.com/feed/",
40
  "https://www.vogue.com/feed/rss",
41
  "https://feeds.bbci.co.uk/news/politics/rss.xml",
 
47
  "https://www.sciencedaily.com/rss/matter_energy/physics.xml",
48
  "https://physicsworld.com/feed/",
49
  "https://www.swpc.noaa.gov/rss.xml",
50
+ "https://www.nasa.gov/rss/dyn/solar_system.rss",
51
+ "https://weather.com/science/space/rss",
52
+ "https://www.space.com/feeds/space-weather",
53
+ "https://www.accuweather.com/en/rss",
54
  "https://feeds.bbci.co.uk/weather/feeds/rss/5day/world/",
55
  "https://www.weather.gov/rss",
56
  "https://www.foxweather.com/rss",
 
68
  "https://www.atlasobscura.com/feeds/latest"
69
  ]
70
 
 
71
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
  vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
73
  hf_api = HfApi()
 
128
  logger.error(f"Error categorizing article: {e}")
129
  return "Neutral"
130
 
131
+ def process_and_store_articles(articles, summarize=False):
132
  documents = []
133
  for article in articles:
134
  try:
135
+ summary = summarize_article(article["description"]) if summarize else article["description"]
136
  sentiment = categorize_article(article["description"])
137
  doc = Document(
138
  page_content=summary,