broadfield-dev commited on
Commit
9383dc3
·
verified ·
1 Parent(s): 7b84ed4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -13
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import os
2
  import threading
3
- from flask import Flask, render_template, request, jsonify
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
 
6
 
7
  app = Flask(__name__)
8
 
@@ -22,9 +23,14 @@ def load_feeds_in_background():
22
  process_and_store_articles(articles)
23
  logger.info("Background feed processing complete")
24
  loading_complete = True
 
 
 
 
 
25
  except Exception as e:
26
  logger.error(f"Error in background feed loading: {e}")
27
- loading_complete = True # Mark as complete even on error to avoid infinite polling
28
 
29
  @app.route('/')
30
  def index():
@@ -35,13 +41,13 @@ def index():
35
  threading.Thread(target=load_feeds_in_background, daemon=True).start()
36
 
37
  try:
38
- # Retrieve the 10 most recent articles from Chroma DB
39
  all_docs = vector_db.get(include=['documents', 'metadatas'])
40
  if not all_docs.get('metadatas'):
41
  logger.info("No articles in DB yet")
42
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
43
 
44
- # Sort by 'published' date (if available) and take top 10
45
  enriched_articles = []
46
  seen_keys = set()
47
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
@@ -61,20 +67,25 @@ def index():
61
  "image": meta.get("image", "svg"),
62
  })
63
 
64
- # Sort by published date (assuming it's in a parseable format; fallback to order if not)
65
  enriched_articles.sort(key=lambda x: x["published"], reverse=True)
66
- recent_articles = enriched_articles[:10]
67
- logger.info(f"Displaying {len(recent_articles)} recent articles")
68
 
69
- # Categorize recent articles
70
  categorized_articles = {}
71
- for article in recent_articles:
72
  cat = article["category"]
73
- categorized_articles.setdefault(cat, []).append(article)
74
-
 
 
 
 
 
 
 
75
  return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
76
  except Exception as e:
77
- logger.error(f"Error retrieving recent articles: {e}")
78
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
79
 
80
  @app.route('/search', methods=['POST'])
@@ -114,7 +125,7 @@ def search():
114
  logger.error(f"Search error: {e}")
115
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
116
 
117
- @app.route('/check_loading', methods=['GET'])
118
  def check_loading():
119
  global loading_complete
120
  if loading_complete:
 
1
  import os
2
  import threading
3
+ from flask import Flask, render_template, request, jsonify, Response
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
6
+ import json
7
 
8
  app = Flask(__name__)
9
 
 
23
  process_and_store_articles(articles)
24
  logger.info("Background feed processing complete")
25
  loading_complete = True
26
+ # Notify frontend of new data (simulated via SSE for simplicity)
27
+ def event_stream():
28
+ yield f"data: {json.dumps({'status': 'updated'})}\n\n"
29
+ app.response_class = Response
30
+ return Response(event_stream(), mimetype="text/event-stream")
31
  except Exception as e:
32
  logger.error(f"Error in background feed loading: {e}")
33
+ loading_complete = True
34
 
35
  @app.route('/')
36
  def index():
 
41
  threading.Thread(target=load_feeds_in_background, daemon=True).start()
42
 
43
  try:
44
+ # Retrieve all articles from Chroma DB
45
  all_docs = vector_db.get(include=['documents', 'metadatas'])
46
  if not all_docs.get('metadatas'):
47
  logger.info("No articles in DB yet")
48
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
49
 
50
+ # Process and categorize articles, getting 10 most recent per category
51
  enriched_articles = []
52
  seen_keys = set()
53
  for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
 
67
  "image": meta.get("image", "svg"),
68
  })
69
 
70
+ # Sort by published date (assuming ISO format or comparable string)
71
  enriched_articles.sort(key=lambda x: x["published"], reverse=True)
 
 
72
 
73
+ # Group by category and limit to 10 most recent per category
74
  categorized_articles = {}
75
+ for article in enriched_articles:
76
  cat = article["category"]
77
+ if cat not in categorized_articles:
78
+ categorized_articles[cat] = []
79
+ categorized_articles[cat].append(article)
80
+
81
+ # Limit to 10 most recent per category
82
+ for cat in categorized_articles:
83
+ categorized_articles[cat] = sorted(categorized_articles[cat], key=lambda x: x["published"], reverse=True)[:10]
84
+
85
+ logger.info(f"Displaying articles: {sum(len(articles) for articles in categorized_articles.values())} total")
86
  return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
87
  except Exception as e:
88
+ logger.error(f"Error retrieving articles: {e}")
89
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
90
 
91
  @app.route('/search', methods=['POST'])
 
125
  logger.error(f"Search error: {e}")
126
  return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
127
 
128
+ @app.route('/check_loading')
129
  def check_loading():
130
  global loading_complete
131
  if loading_complete: