Spaces:

broadfield-dev
/

RSS_News

Runtime error

App Files Files Community

broadfield-dev commited on Feb 22

Commit

a14387f

verified ·

1 Parent(s): af08b40

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -25

app.py CHANGED Viewed

@@ -20,12 +20,12 @@ last_update_time = time.time()
 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
-        logger.info("Starting background RSS feed fetch and database population")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
-        logger.info("Background feed processing and database population complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
@@ -36,37 +36,26 @@ def load_feeds_in_background():
 def index():
     global loading_complete, last_update_time
-    # Assume loading is in progress initially
-    loading = True
     # Check if the database needs to be loaded (first time or empty)
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
-        logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
         download_from_hf_hub()
-        # Immediately populate the database with RSS feeds (synchronously for first load)
-        articles = fetch_rss_feeds()
-        process_and_store_articles(articles)
-        upload_to_hf_hub()
-        loading_complete = True
     else:
-        # Database exists, but check if loading is complete
-        if not loading_complete:
-            threading.Thread(target=load_feeds_in_background, daemon=True).start()
-        else:
-            loading = False  # Only set loading=False if database is ready and complete
     try:
-        # Retrieve all articles from Chroma DB (always load from database)
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
-            logger.info("No articles in DB yet, initializing with RSS feeds...")
-            articles = fetch_rss_feeds()
-            process_and_store_articles(articles)
-            upload_to_hf_hub()
-            all_docs = vector_db.get(include=['documents', 'metadatas'])
-        # Process and categorize articles with strict deduplication from the database
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -126,10 +115,10 @@ def index():
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
-                              loading=loading)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
-        return render_template("index.html", categorized_articles={}, has_articles=False, loading=loading)
 @app.route('/search', methods=['POST'])
 def search():

 def load_feeds_in_background():
     global loading_complete, last_update_time
     try:
+        logger.info("Starting background RSS feed fetch")
         articles = fetch_rss_feeds()
         logger.info(f"Fetched {len(articles)} articles")
         process_and_store_articles(articles)
         last_update_time = time.time()
+        logger.info("Background feed processing complete")
         upload_to_hf_hub()
         loading_complete = True
     except Exception as e:
 def index():
     global loading_complete, last_update_time
     # Check if the database needs to be loaded (first time or empty)
     db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
     if not db_exists:
+        loading_complete = False
+        logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
+        threading.Thread(target=load_feeds_in_background, daemon=True).start()
+    elif not loading_complete:
+        pass  # Let background loading continue
     else:
+        loading_complete = True
     try:
+        # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
+            logger.info("No articles in DB yet")
+            return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
+        # Process and categorize articles with strict deduplication
         enriched_articles = []
         seen_keys = set()
         for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
+                              loading=not loading_complete)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
+        return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 @app.route('/search', methods=['POST'])
 def search():