Spaces:

broadfield-dev
/

grok_test

Runtime error

App Files Files Community

broadfield-dev commited on Feb 20

Commit

70bea74

verified ·

1 Parent(s): 1f5e987

Update rss_processor.py

Browse files

Files changed (1) hide show

rss_processor.py +8 -5

rss_processor.py CHANGED Viewed

@@ -17,11 +17,9 @@ HF_MODEL = "Qwen/Qwen2.5-Coder-32B-Instruct"
 REPO_ID = "broadfield-dev/news-rag-db"
 LOCAL_DB_DIR = "chroma_db"
-# Explicitly login to Hugging Face Hub
 login(token=HF_API_TOKEN)
 client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
-# RSS feeds
 RSS_FEEDS = [
     "https://www.sciencedaily.com/rss/top/science.xml",
     "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
@@ -36,6 +34,8 @@ RSS_FEEDS = [
     "https://www.scientificamerican.com/rss/",
     "https://www.newscientist.com/feed/home/",
     "https://www.livescience.com/feeds/all",
     "https://astrostyle.com/feed/",
     "https://www.vogue.com/feed/rss",
     "https://feeds.bbci.co.uk/news/politics/rss.xml",
@@ -47,6 +47,10 @@ RSS_FEEDS = [
     "https://www.sciencedaily.com/rss/matter_energy/physics.xml",
     "https://physicsworld.com/feed/",
     "https://www.swpc.noaa.gov/rss.xml",
     "https://feeds.bbci.co.uk/weather/feeds/rss/5day/world/",
     "https://www.weather.gov/rss",
     "https://www.foxweather.com/rss",
@@ -64,7 +68,6 @@ RSS_FEEDS = [
     "https://www.atlasobscura.com/feeds/latest"
 ]
-# Embedding model and vector DB
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
 hf_api = HfApi()
@@ -125,11 +128,11 @@ def categorize_article(text):
         logger.error(f"Error categorizing article: {e}")
         return "Neutral"
-def process_and_store_articles(articles):
     documents = []
     for article in articles:
         try:
-            summary = summarize_article(article["description"])
             sentiment = categorize_article(article["description"])
             doc = Document(
                 page_content=summary,

 REPO_ID = "broadfield-dev/news-rag-db"
 LOCAL_DB_DIR = "chroma_db"
 login(token=HF_API_TOKEN)
 client = InferenceClient(model=HF_MODEL, token=HF_API_TOKEN)
 RSS_FEEDS = [
     "https://www.sciencedaily.com/rss/top/science.xml",
     "https://www.horoscope.com/us/horoscopes/general/rss/horoscope-rss.aspx",
     "https://www.scientificamerican.com/rss/",
     "https://www.newscientist.com/feed/home/",
     "https://www.livescience.com/feeds/all",
+    "https://www.hindustantimes.com/feed/horoscope/rss",
+    "https://www.washingtonpost.com/wp-srv/style/horoscopes/rss.xml",
     "https://astrostyle.com/feed/",
     "https://www.vogue.com/feed/rss",
     "https://feeds.bbci.co.uk/news/politics/rss.xml",
     "https://www.sciencedaily.com/rss/matter_energy/physics.xml",
     "https://physicsworld.com/feed/",
     "https://www.swpc.noaa.gov/rss.xml",
+    "https://www.nasa.gov/rss/dyn/solar_system.rss",
+    "https://weather.com/science/space/rss",
+    "https://www.space.com/feeds/space-weather",
+    "https://www.accuweather.com/en/rss",
     "https://feeds.bbci.co.uk/weather/feeds/rss/5day/world/",
     "https://www.weather.gov/rss",
     "https://www.foxweather.com/rss",
     "https://www.atlasobscura.com/feeds/latest"
 ]
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vector_db = Chroma(persist_directory=LOCAL_DB_DIR, embedding_function=embedding_model)
 hf_api = HfApi()
         logger.error(f"Error categorizing article: {e}")
         return "Neutral"
+def process_and_store_articles(articles, summarize=False):
     documents = []
     for article in articles:
         try:
+            summary = summarize_article(article["description"]) if summarize else article["description"]
             sentiment = categorize_article(article["description"])
             doc = Document(
                 page_content=summary,