broadfield-dev commited on
Commit
af08b40
·
verified ·
1 Parent(s): 183682b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -20,12 +20,12 @@ last_update_time = time.time()
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
- logger.info("Starting background RSS feed fetch")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
- logger.info("Background feed processing complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
@@ -36,26 +36,37 @@ def load_feeds_in_background():
36
  def index():
37
  global loading_complete, last_update_time
38
 
 
 
 
39
  # Check if the database needs to be loaded (first time or empty)
40
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
41
  if not db_exists:
42
- loading_complete = False
43
- logger.info("Downloading Chroma DB from Hugging Face Hub...")
44
  download_from_hf_hub()
45
- threading.Thread(target=load_feeds_in_background, daemon=True).start()
46
- elif not loading_complete:
47
- pass # Let background loading continue
48
- else:
49
  loading_complete = True
 
 
 
 
 
 
50
 
51
  try:
52
- # Retrieve all articles from Chroma DB
53
  all_docs = vector_db.get(include=['documents', 'metadatas'])
54
  if not all_docs.get('metadatas'):
55
- logger.info("No articles in DB yet")
56
- return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 
 
 
57
 
58
- # Process and categorize articles with strict deduplication
59
  enriched_articles = []
60
  seen_keys = set()
61
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -115,10 +126,10 @@ def index():
115
  return render_template("index.html",
116
  categorized_articles=categorized_articles,
117
  has_articles=True,
118
- loading=not loading_complete)
119
  except Exception as e:
120
  logger.error(f"Error retrieving articles: {e}")
121
- return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
122
 
123
  @app.route('/search', methods=['POST'])
124
  def search():
 
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
+ logger.info("Starting background RSS feed fetch and database population")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
+ logger.info("Background feed processing and database population complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
 
36
  def index():
37
  global loading_complete, last_update_time
38
 
39
+ # Assume loading is in progress initially
40
+ loading = True
41
+
42
  # Check if the database needs to be loaded (first time or empty)
43
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
44
  if not db_exists:
45
+ logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
 
46
  download_from_hf_hub()
47
+ # Immediately populate the database with RSS feeds (synchronously for first load)
48
+ articles = fetch_rss_feeds()
49
+ process_and_store_articles(articles)
50
+ upload_to_hf_hub()
51
  loading_complete = True
52
+ else:
53
+ # Database exists, but check if loading is complete
54
+ if not loading_complete:
55
+ threading.Thread(target=load_feeds_in_background, daemon=True).start()
56
+ else:
57
+ loading = False # Only set loading=False if database is ready and complete
58
 
59
  try:
60
+ # Retrieve all articles from Chroma DB (always load from database)
61
  all_docs = vector_db.get(include=['documents', 'metadatas'])
62
  if not all_docs.get('metadatas'):
63
+ logger.info("No articles in DB yet, initializing with RSS feeds...")
64
+ articles = fetch_rss_feeds()
65
+ process_and_store_articles(articles)
66
+ upload_to_hf_hub()
67
+ all_docs = vector_db.get(include=['documents', 'metadatas'])
68
 
69
+ # Process and categorize articles with strict deduplication from the database
70
  enriched_articles = []
71
  seen_keys = set()
72
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
126
  return render_template("index.html",
127
  categorized_articles=categorized_articles,
128
  has_articles=True,
129
+ loading=loading)
130
  except Exception as e:
131
  logger.error(f"Error retrieving articles: {e}")
132
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=loading)
133
 
134
  @app.route('/search', methods=['POST'])
135
  def search():