Spaces:

broadfield-dev
/

RSS_News

Runtime error

App Files Files Community

broadfield-dev commited on Feb 22

Commit

af08b40

verified ·

1 Parent(s): 183682b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -20,12 +20,12 @@ last_update_time = time.time()
 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
-        logger.info("Starting background RSS feed fetch")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
-        logger.info("Background feed processing complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
@@ -36,26 +36,37 @@ def load_feeds_in_background():
 def index():
     global loading_complete, last_update_time
     # Check if the database needs to be loaded (first time or empty)
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
-        loading_complete = False
-        logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
-        threading.Thread(target=load_feeds_in_background, daemon=True).start()
-    elif not loading_complete:
-        pass  # Let background loading continue
-    else:
         loading_complete = True
     try:
-        # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
-            logger.info("No articles in DB yet")
-            return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
-        # Process and categorize articles with strict deduplication
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -115,10 +126,10 @@ def index():
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
-                              loading=not loading_complete)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
-        return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 @app.route('/search', methods=['POST'])
 def search():

 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
+        logger.info("Starting background RSS feed fetch and database population")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
+        logger.info("Background feed processing and database population complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
 def index():
     global loading_complete, last_update_time
+    # Assume loading is in progress initially
+    loading = True
     # Check if the database needs to be loaded (first time or empty)
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
+        logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
         download_from_hf_hub()
+        # Immediately populate the database with RSS feeds (synchronously for first load)
+        articles = fetch_rss_feeds()
+        process_and_store_articles(articles)
+        upload_to_hf_hub()
         loading_complete = True
+    else:
+        # Database exists, but check if loading is complete
+        if not loading_complete:
+            threading.Thread(target=load_feeds_in_background, daemon=True).start()
+        else:
+            loading = False  # Only set loading=False if database is ready and complete
     try:
+        # Retrieve all articles from Chroma DB (always load from database)
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
+            logger.info("No articles in DB yet, initializing with RSS feeds...")
+            articles = fetch_rss_feeds()
+            process_and_store_articles(articles)
+            upload_to_hf_hub()
+            all_docs = vector_db.get(include=['documents', 'metadatas'])
+        # Process and categorize articles with strict deduplication from the database
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
+                              loading=loading)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
+        return render_template("index.html", categorized_articles={}, has_articles=False, loading=loading)
 @app.route('/search', methods=['POST'])
 def search():