broadfield-dev commited on
Commit
183682b
·
verified ·
1 Parent(s): 3910553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -18
app.py CHANGED
@@ -20,12 +20,12 @@ last_update_time = time.time()
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
- logger.info("Starting background RSS feed fetch and database population")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
- logger.info("Background feed processing and database population complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
@@ -40,29 +40,22 @@ def index():
40
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
41
  if not db_exists:
42
  loading_complete = False
43
- logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
44
  download_from_hf_hub()
45
- # Immediately populate the database with RSS feeds (synchronously for first load)
46
- articles = fetch_rss_feeds()
47
- process_and_store_articles(articles)
48
- upload_to_hf_hub()
49
- loading_complete = True
50
  else:
51
- # Database exists, but check if loading is complete
52
- if not loading_complete:
53
- threading.Thread(target=load_feeds_in_background, daemon=True).start()
54
 
55
  try:
56
- # Retrieve all articles from Chroma DB (always load from database)
57
  all_docs = vector_db.get(include=['documents', 'metadatas'])
58
  if not all_docs.get('metadatas'):
59
- logger.info("No articles in DB yet, initializing with RSS feeds...")
60
- articles = fetch_rss_feeds()
61
- process_and_store_articles(articles)
62
- upload_to_hf_hub()
63
- all_docs = vector_db.get(include=['documents', 'metadatas'])
64
 
65
- # Process and categorize articles with strict deduplication from the database
66
  enriched_articles = []
67
  seen_keys = set()
68
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
+ logger.info("Starting background RSS feed fetch")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
+ logger.info("Background feed processing complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
 
40
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
41
  if not db_exists:
42
  loading_complete = False
43
+ logger.info("Downloading Chroma DB from Hugging Face Hub...")
44
  download_from_hf_hub()
45
+ threading.Thread(target=load_feeds_in_background, daemon=True).start()
46
+ elif not loading_complete:
47
+ pass # Let background loading continue
 
 
48
  else:
49
+ loading_complete = True
 
 
50
 
51
  try:
52
+ # Retrieve all articles from Chroma DB
53
  all_docs = vector_db.get(include=['documents', 'metadatas'])
54
  if not all_docs.get('metadatas'):
55
+ logger.info("No articles in DB yet")
56
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 
 
 
57
 
58
+ # Process and categorize articles with strict deduplication
59
  enriched_articles = []
60
  seen_keys = set()
61
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):