Spaces:

broadfield-dev
/

RSS_News

Runtime error

App Files Files Community

broadfield-dev commited on Feb 22

Commit

935c631

verified ·

1 Parent(s): 6fa35e8

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -17

app.py CHANGED Viewed

@@ -34,23 +34,29 @@ def load_feeds_in_background():
 @app.route('/')
 def index():
-    global loading_complete
-    loading_complete = False  # Reset on each load
-    # Ensure Chroma DB is downloaded from Hugging Face Hub on first load
-    if not os.path.exists("chroma_db"):
         logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
-    # Start background feed loading
-    threading.Thread(target=load_feeds_in_background, daemon=True).start()
     try:
         # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
             logger.info("No articles in DB yet")
-            return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
         # Process and categorize articles, getting only 10 most recent per category with strict deduplication
         enriched_articles = []
@@ -61,15 +67,12 @@ def index():
             title = meta.get("title", "No Title").strip()
             link = meta.get("link", "").strip()
             published = meta.get("published", "Unknown Date").strip()
-            # Use a more robust key including trimmed fields to prevent duplicates
             key = f"{title}|{link}|{published}"
             if key not in seen_keys:
                 seen_keys.add(key)
-                # Try to parse published date, fallback to string sorting
                 try:
                     published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
                 except (ValueError, TypeError):
-                    # Fallback to a very old date for sorting if parsing fails
                     published = "1970-01-01T00:00:00"
                 enriched_articles.append({
                     "title": title,
@@ -80,21 +83,17 @@ def index():
                     "image": meta.get("image", "svg"),
                 })
-        # Sort by published date (handle both datetime and string)
         enriched_articles.sort(key=lambda x: x["published"], reverse=True)
-        # Group by category and limit to 10 most recent per category with final deduplication
         categorized_articles = {}
         for article in enriched_articles:
             cat = article["category"]
             if cat not in categorized_articles:
                 categorized_articles[cat] = []
-            # Add only if not already in the category list (extra deduplication)
             key = f"{article['title']}|{article['link']}|{article['published']}"
             if key not in [f"{a['title']}|{a['link']}|{a['published']}" for a in categorized_articles[cat]]:
                 categorized_articles[cat].append(article)
-        # Limit to 10 most recent per category and sort again for safety
         for cat in categorized_articles:
             unique_articles = []
             seen_cat_keys = set()
@@ -109,10 +108,10 @@ def index():
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
-                              loading=True)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
-        return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
 @app.route('/search', methods=['POST'])
 def search():

 @app.route('/')
 def index():
+    global loading_complete, last_update_time
+    # Check if the database needs to be loaded (first time or empty)
+    db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
+    if not db_exists:
+        # First load: DB doesn't exist or is empty
+        loading_complete = False
         logger.info("Downloading Chroma DB from Hugging Face Hub...")
         download_from_hf_hub()
+        threading.Thread(target=load_feeds_in_background, daemon=True).start()
+    elif not loading_complete:
+        # Background loading is still in progress from a previous request
+        pass  # Let it continue, spinner will show
+    else:
+        # DB exists and loading is complete, no spinner needed
+        loading_complete = True
     try:
         # Retrieve all articles from Chroma DB
         all_docs = vector_db.get(include=['documents', 'metadatas'])
         if not all_docs.get('metadatas'):
             logger.info("No articles in DB yet")
+            return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
         # Process and categorize articles, getting only 10 most recent per category with strict deduplication
         enriched_articles = []
             title = meta.get("title", "No Title").strip()
             link = meta.get("link", "").strip()
             published = meta.get("published", "Unknown Date").strip()
             key = f"{title}|{link}|{published}"
             if key not in seen_keys:
                 seen_keys.add(key)
                 try:
                     published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
                 except (ValueError, TypeError):
                     published = "1970-01-01T00:00:00"
                 enriched_articles.append({
                     "title": title,
                     "image": meta.get("image", "svg"),
                 })
         enriched_articles.sort(key=lambda x: x["published"], reverse=True)
         categorized_articles = {}
         for article in enriched_articles:
             cat = article["category"]
             if cat not in categorized_articles:
                 categorized_articles[cat] = []
             key = f"{article['title']}|{article['link']}|{article['published']}"
             if key not in [f"{a['title']}|{a['link']}|{a['published']}" for a in categorized_articles[cat]]:
                 categorized_articles[cat].append(article)
         for cat in categorized_articles:
             unique_articles = []
             seen_cat_keys = set()
         return render_template("index.html",
                               categorized_articles=categorized_articles,
                               has_articles=True,
+                              loading=not loading_complete)
     except Exception as e:
         logger.error(f"Error retrieving articles: {e}")
+        return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 @app.route('/search', methods=['POST'])
 def search():