broadfield-dev commited on
Commit
ae30ab9
·
verified ·
1 Parent(s): dda464b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -13
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import os
2
  import threading
3
- from flask import Flask, render_template, request, jsonify, Response
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
6
- import json
 
7
 
8
  app = Flask(__name__)
9
 
@@ -13,21 +14,18 @@ logger = logging.getLogger(__name__)
13
 
14
  # Global flag to track background loading
15
  loading_complete = False
 
16
 
17
  def load_feeds_in_background():
18
- global loading_complete
19
  try:
20
  logger.info("Starting background RSS feed fetch")
21
  articles = fetch_rss_feeds()
22
  logger.info(f"Fetched {len(articles)} articles")
23
  process_and_store_articles(articles)
 
24
  logger.info("Background feed processing complete")
25
  loading_complete = True
26
- # Notify frontend of new data (simulated via SSE for simplicity)
27
- def event_stream():
28
- yield f"data: {json.dumps({'status': 'updated'})}\n\n"
29
- app.response_class = Response
30
- return Response(event_stream(), mimetype="text/event-stream")
31
  except Exception as e:
32
  logger.error(f"Error in background feed loading: {e}")
33
  loading_complete = True
@@ -58,17 +56,23 @@ def index():
58
  key = f"{title}|{link}"
59
  if key not in seen_keys:
60
  seen_keys.add(key)
 
 
 
 
 
 
61
  enriched_articles.append({
62
  "title": title,
63
  "link": link,
64
  "description": meta.get("original_description", "No Description"),
65
  "category": meta.get("category", "Uncategorized"),
66
- "published": meta.get("published", "Unknown Date"),
67
  "image": meta.get("image", "svg"),
68
  })
69
 
70
- # Sort by published date (assuming ISO format or comparable string)
71
- enriched_articles.sort(key=lambda x: x["published"], reverse=True)
72
 
73
  # Group by category and limit to 10 most recent per category
74
  categorized_articles = {}
@@ -127,10 +131,59 @@ def search():
127
 
128
  @app.route('/check_loading')
129
  def check_loading():
130
- global loading_complete
131
  if loading_complete:
132
- return jsonify({"status": "complete"})
133
  return jsonify({"status": "loading"}), 202
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  if __name__ == "__main__":
136
  app.run(host="0.0.0.0", port=7860)
 
1
  import os
2
  import threading
3
+ from flask import Flask, render_template, request, jsonify
4
  from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
5
  import logging
6
+ import time
7
+ from datetime import datetime
8
 
9
  app = Flask(__name__)
10
 
 
14
 
15
  # Global flag to track background loading
16
  loading_complete = False
17
+ last_update_time = time.time()
18
 
19
  def load_feeds_in_background():
20
+ global loading_complete, last_update_time
21
  try:
22
  logger.info("Starting background RSS feed fetch")
23
  articles = fetch_rss_feeds()
24
  logger.info(f"Fetched {len(articles)} articles")
25
  process_and_store_articles(articles)
26
+ last_update_time = time.time() # Update timestamp when new articles are added
27
  logger.info("Background feed processing complete")
28
  loading_complete = True
 
 
 
 
 
29
  except Exception as e:
30
  logger.error(f"Error in background feed loading: {e}")
31
  loading_complete = True
 
56
  key = f"{title}|{link}"
57
  if key not in seen_keys:
58
  seen_keys.add(key)
59
+ # Try to parse published date, fallback to string sorting
60
+ published = meta.get("published", "Unknown Date")
61
+ try:
62
+ published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
63
+ except (ValueError, TypeError):
64
+ pass # Keep as is if parsing fails
65
  enriched_articles.append({
66
  "title": title,
67
  "link": link,
68
  "description": meta.get("original_description", "No Description"),
69
  "category": meta.get("category", "Uncategorized"),
70
+ "published": published,
71
  "image": meta.get("image", "svg"),
72
  })
73
 
74
+ # Sort by published date (handle both datetime and string)
75
+ enriched_articles.sort(key=lambda x: x["published"] if "Unknown" not in x["published"] else "1970-01-01", reverse=True)
76
 
77
  # Group by category and limit to 10 most recent per category
78
  categorized_articles = {}
 
131
 
132
  @app.route('/check_loading')
133
  def check_loading():
134
+ global loading_complete, last_update_time
135
  if loading_complete:
136
+ return jsonify({"status": "complete", "last_update": last_update_time})
137
  return jsonify({"status": "loading"}), 202
138
 
139
+ @app.route('/get_updates')
140
+ def get_updates():
141
+ global last_update_time
142
+ try:
143
+ all_docs = vector_db.get(include=['documents', 'metadatas'])
144
+ if not all_docs.get('metadatas'):
145
+ return jsonify({"articles": [], "last_update": last_update_time})
146
+
147
+ enriched_articles = []
148
+ seen_keys = set()
149
+ for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
150
+ if not meta:
151
+ continue
152
+ title = meta.get("title", "No Title")
153
+ link = meta.get("link", "")
154
+ key = f"{title}|{link}"
155
+ if key not in seen_keys:
156
+ seen_keys.add(key)
157
+ published = meta.get("published", "Unknown Date")
158
+ try:
159
+ published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
160
+ except (ValueError, TypeError):
161
+ pass
162
+ enriched_articles.append({
163
+ "title": title,
164
+ "link": link,
165
+ "description": meta.get("original_description", "No Description"),
166
+ "category": meta.get("category", "Uncategorized"),
167
+ "published": published,
168
+ "image": meta.get("image", "svg"),
169
+ })
170
+
171
+ enriched_articles.sort(key=lambda x: x["published"], reverse=True)
172
+ categorized_articles = {}
173
+ for article in enriched_articles:
174
+ cat = article["category"]
175
+ if cat not in categorized_articles:
176
+ categorized_articles[cat] = []
177
+ categorized_articles[cat].append(article)
178
+
179
+ # Limit to 10 most recent per category
180
+ for cat in categorized_articles:
181
+ categorized_articles[cat] = sorted(categorized_articles[cat], key=lambda x: x["published"], reverse=True)[:10]
182
+
183
+ return jsonify({"articles": categorized_articles, "last_update": last_update_time})
184
+ except Exception as e:
185
+ logger.error(f"Error fetching updates: {e}")
186
+ return jsonify({"articles": {}, "last_update": last_update_time}), 500
187
+
188
  if __name__ == "__main__":
189
  app.run(host="0.0.0.0", port=7860)