Spaces:

broadfield-dev
/

RSS_News

Runtime error

App Files Files Community

broadfield-dev commited on Feb 22

Commit

183682b

verified ·

1 Parent(s): 3910553

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -18

app.py CHANGED Viewed

@@ -20,12 +20,12 @@ last_update_time = time.time()
 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
-        logger.info("Starting background RSS feed fetch and database population")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
-        logger.info("Background feed processing and database population complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
@@ -40,29 +40,22 @@ def index():
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
         loading_complete = False
-        logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
         download_from_hf_hub()
-        # Immediately populate the database with RSS feeds (synchronously for first load)
-        articles = fetch_rss_feeds()
-        process_and_store_articles(articles)
-        upload_to_hf_hub()
-        loading_complete = True
     else:
-        # Database exists, but check if loading is complete
-        if not loading_complete:
-            threading.Thread(target=load_feeds_in_background, daemon=True).start()
     try:
-        # Retrieve all articles from Chroma DB (always load from database)
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
-            logger.info("No articles in DB yet, initializing with RSS feeds...")
-            articles = fetch_rss_feeds()
-            process_and_store_articles(articles)
-            upload_to_hf_hub()
-            all_docs = vector_db.get(include=['documents', 'metadatas'])
-        # Process and categorize articles with strict deduplication from the database
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):

 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
+        logger.info("Starting background RSS feed fetch")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
+        logger.info("Background feed processing complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
         loading_complete = False
+        logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
+        threading.Thread(target=load_feeds_in_background, daemon=True).start()
+    elif not loading_complete:
+        pass  # Let background loading continue
     else:
+        loading_complete = True
     try:
+        # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
+            logger.info("No articles in DB yet")
+            return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
+        # Process and categorize articles with strict deduplication
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):