broadfield-dev commited on
Commit
a14387f
·
verified ·
1 Parent(s): af08b40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -25
app.py CHANGED
@@ -20,12 +20,12 @@ last_update_time = time.time()
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
- logger.info("Starting background RSS feed fetch and database population")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
- logger.info("Background feed processing and database population complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
@@ -36,37 +36,26 @@ def load_feeds_in_background():
36
  def index():
37
  global loading_complete, last_update_time
38
 
39
- # Assume loading is in progress initially
40
- loading = True
41
-
42
  # Check if the database needs to be loaded (first time or empty)
43
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
44
  if not db_exists:
45
- logger.info("Downloading Chroma DB from Hugging Face Hub or initializing empty DB...")
 
46
  download_from_hf_hub()
47
- # Immediately populate the database with RSS feeds (synchronously for first load)
48
- articles = fetch_rss_feeds()
49
- process_and_store_articles(articles)
50
- upload_to_hf_hub()
51
- loading_complete = True
52
  else:
53
- # Database exists, but check if loading is complete
54
- if not loading_complete:
55
- threading.Thread(target=load_feeds_in_background, daemon=True).start()
56
- else:
57
- loading = False # Only set loading=False if database is ready and complete
58
 
59
  try:
60
- # Retrieve all articles from Chroma DB (always load from database)
61
  all_docs = vector_db.get(include=['documents', 'metadatas'])
62
  if not all_docs.get('metadatas'):
63
- logger.info("No articles in DB yet, initializing with RSS feeds...")
64
- articles = fetch_rss_feeds()
65
- process_and_store_articles(articles)
66
- upload_to_hf_hub()
67
- all_docs = vector_db.get(include=['documents', 'metadatas'])
68
 
69
- # Process and categorize articles with strict deduplication from the database
70
  enriched_articles = []
71
  seen_keys = set()
72
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -126,10 +115,10 @@ def index():
126
  return render_template("index.html",
127
  categorized_articles=categorized_articles,
128
  has_articles=True,
129
- loading=loading)
130
  except Exception as e:
131
  logger.error(f"Error retrieving articles: {e}")
132
- return render_template("index.html", categorized_articles={}, has_articles=False, loading=loading)
133
 
134
  @app.route('/search', methods=['POST'])
135
  def search():
 
20
  def load_feeds_in_background():
21
  global loading_complete, last_update_time
22
  try:
23
+ logger.info("Starting background RSS feed fetch")
24
  articles = fetch_rss_feeds()
25
  logger.info(f"Fetched {len(articles)} articles")
26
  process_and_store_articles(articles)
27
  last_update_time = time.time()
28
+ logger.info("Background feed processing complete")
29
  upload_to_hf_hub()
30
  loading_complete = True
31
  except Exception as e:
 
36
  def index():
37
  global loading_complete, last_update_time
38
 
 
 
 
39
  # Check if the database needs to be loaded (first time or empty)
40
  db_exists = os.path.exists("chroma_db") and vector_db.get().get('documents')
41
  if not db_exists:
42
+ loading_complete = False
43
+ logger.info("Downloading Chroma DB from Hugging Face Hub...")
44
  download_from_hf_hub()
45
+ threading.Thread(target=load_feeds_in_background, daemon=True).start()
46
+ elif not loading_complete:
47
+ pass # Let background loading continue
 
 
48
  else:
49
+ loading_complete = True
 
 
 
 
50
 
51
  try:
52
+ # Retrieve all articles from Chroma DB
53
  all_docs = vector_db.get(include=['documents', 'metadatas'])
54
  if not all_docs.get('metadatas'):
55
+ logger.info("No articles in DB yet")
56
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
 
 
 
57
 
58
+ # Process and categorize articles with strict deduplication
59
  enriched_articles = []
60
  seen_keys = set()
61
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
115
  return render_template("index.html",
116
  categorized_articles=categorized_articles,
117
  has_articles=True,
118
+ loading=not loading_complete)
119
  except Exception as e:
120
  logger.error(f"Error retrieving articles: {e}")
121
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=not loading_complete)
122
 
123
  @app.route('/search', methods=['POST'])
124
  def search():