broadfield-dev commited on
Commit
37fdec4
·
verified ·
1 Parent(s): 7e454f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -21
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from flask import Flask, render_template, request, jsonify
3
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
4
  import logging
@@ -9,22 +10,38 @@ app = Flask(__name__)
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
- @app.route('/')
13
- def index():
 
 
 
14
  try:
15
- # Fetch and store articles synchronously on first load
16
  articles = fetch_rss_feeds()
17
  logger.info(f"Fetched {len(articles)} articles")
18
  process_and_store_articles(articles)
19
- logger.info("Articles processed and stored")
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Retrieve all articles from Chroma DB
 
22
  all_docs = vector_db.get(include=['documents', 'metadatas'])
23
  if not all_docs.get('metadatas'):
24
- logger.warning("No articles in DB yet")
25
- return render_template("index.html", categorized_articles={}, has_articles=False)
26
 
27
- # Process retrieved documents
28
  enriched_articles = []
29
  seen_keys = set()
30
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -32,36 +49,39 @@ def index():
32
  continue
33
  title = meta.get("title", "No Title")
34
  link = meta.get("link", "")
35
- description = meta.get("original_description", "No Description")
36
  key = f"{title}|{link}"
37
  if key not in seen_keys:
38
  seen_keys.add(key)
39
  enriched_articles.append({
40
  "title": title,
41
  "link": link,
42
- "description": description,
43
  "category": meta.get("category", "Uncategorized"),
44
  "published": meta.get("published", "Unknown Date"),
45
  "image": meta.get("image", "svg"),
46
  })
47
- logger.info(f"Displaying {len(enriched_articles)} unique articles")
48
 
49
- # Categorize articles
 
 
 
 
 
50
  categorized_articles = {}
51
- for article in enriched_articles:
52
  cat = article["category"]
53
  categorized_articles.setdefault(cat, []).append(article)
54
 
55
- return render_template("index.html", categorized_articles=categorized_articles, has_articles=True)
56
  except Exception as e:
57
- logger.error(f"Error in index: {e}")
58
- return render_template("index.html", categorized_articles={}, has_articles=False)
59
 
60
  @app.route('/search', methods=['POST'])
61
  def search():
62
  query = request.form.get('search')
63
  if not query:
64
- return render_template("index.html", categorized_articles={}, has_articles=False)
65
 
66
  try:
67
  logger.info(f"Searching for: {query}")
@@ -72,14 +92,13 @@ def search():
72
  meta = doc.metadata
73
  title = meta.get("title", "No Title")
74
  link = meta.get("link", "")
75
- description = meta.get("original_description", "No Description")
76
  key = f"{title}|{link}"
77
  if key not in seen_keys:
78
  seen_keys.add(key)
79
  enriched_articles.append({
80
  "title": title,
81
  "link": link,
82
- "description": description,
83
  "category": meta.get("category", "Uncategorized"),
84
  "published": meta.get("published", "Unknown Date"),
85
  "image": meta.get("image", "svg"),
@@ -90,10 +109,17 @@ def search():
90
  cat = article["category"]
91
  categorized_articles.setdefault(cat, []).append(article)
92
 
93
- return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles))
94
  except Exception as e:
95
  logger.error(f"Search error: {e}")
96
- return render_template("index.html", categorized_articles={}, has_articles=False)
 
 
 
 
 
 
 
97
 
98
  if __name__ == "__main__":
99
  app.run(host="0.0.0.0", port=7860)
 
1
  import os
2
+ import threading
3
  from flask import Flask, render_template, request, jsonify
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ # Global flag to track background loading
14
+ loading_complete = False
15
+
16
+ def load_feeds_in_background():
17
+ global loading_complete
18
  try:
19
+ logger.info("Starting background RSS feed fetch")
20
  articles = fetch_rss_feeds()
21
  logger.info(f"Fetched {len(articles)} articles")
22
  process_and_store_articles(articles)
23
+ logger.info("Background feed processing complete")
24
+ loading_complete = True
25
+ except Exception as e:
26
+ logger.error(f"Error in background feed loading: {e}")
27
+ loading_complete = True # Mark as complete even on error to avoid infinite polling
28
+
29
+ @app.route('/')
30
+ def index():
31
+ global loading_complete
32
+ loading_complete = False # Reset on each load
33
+
34
+ # Start background feed loading
35
+ threading.Thread(target=load_feeds_in_background, daemon=True).start()
36
 
37
+ try:
38
+ # Retrieve the 10 most recent articles from Chroma DB
39
  all_docs = vector_db.get(include=['documents', 'metadatas'])
40
  if not all_docs.get('metadatas'):
41
+ logger.info("No articles in DB yet")
42
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
43
 
44
+ # Sort by 'published' date (if available) and take top 10
45
  enriched_articles = []
46
  seen_keys = set()
47
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
49
  continue
50
  title = meta.get("title", "No Title")
51
  link = meta.get("link", "")
 
52
  key = f"{title}|{link}"
53
  if key not in seen_keys:
54
  seen_keys.add(key)
55
  enriched_articles.append({
56
  "title": title,
57
  "link": link,
58
+ "description": meta.get("original_description", "No Description"),
59
  "category": meta.get("category", "Uncategorized"),
60
  "published": meta.get("published", "Unknown Date"),
61
  "image": meta.get("image", "svg"),
62
  })
 
63
 
64
+ # Sort by published date (assuming it's in a parseable format; fallback to order if not)
65
+ enriched_articles.sort(key=lambda x: x["published"], reverse=True)
66
+ recent_articles = enriched_articles[:10]
67
+ logger.info(f"Displaying {len(recent_articles)} recent articles")
68
+
69
+ # Categorize recent articles
70
  categorized_articles = {}
71
+ for article in recent_articles:
72
  cat = article["category"]
73
  categorized_articles.setdefault(cat, []).append(article)
74
 
75
+ return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
76
  except Exception as e:
77
+ logger.error(f"Error retrieving recent articles: {e}")
78
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
79
 
80
  @app.route('/search', methods=['POST'])
81
  def search():
82
  query = request.form.get('search')
83
  if not query:
84
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
85
 
86
  try:
87
  logger.info(f"Searching for: {query}")
 
92
  meta = doc.metadata
93
  title = meta.get("title", "No Title")
94
  link = meta.get("link", "")
 
95
  key = f"{title}|{link}"
96
  if key not in seen_keys:
97
  seen_keys.add(key)
98
  enriched_articles.append({
99
  "title": title,
100
  "link": link,
101
+ "description": meta.get("original_description", "No Description"),
102
  "category": meta.get("category", "Uncategorized"),
103
  "published": meta.get("published", "Unknown Date"),
104
  "image": meta.get("image", "svg"),
 
109
  cat = article["category"]
110
  categorized_articles.setdefault(cat, []).append(article)
111
 
112
+ return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles), loading=False)
113
  except Exception as e:
114
  logger.error(f"Search error: {e}")
115
+ return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
116
+
117
+ @app.route('/check_loading', methods=['GET'])
118
+ def check_loading():
119
+ global loading_complete
120
+ if loading_complete:
121
+ return jsonify({"status": "complete"})
122
+ return jsonify({"status": "loading"}), 202
123
 
124
  if __name__ == "__main__":
125
  app.run(host="0.0.0.0", port=7860)