Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import os
|
2 |
import threading
|
3 |
-
from flask import Flask, render_template, request, jsonify
|
4 |
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
|
5 |
import logging
|
6 |
-
import
|
|
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
|
@@ -13,21 +14,18 @@ logger = logging.getLogger(__name__)
|
|
13 |
|
14 |
# Global flag to track background loading
|
15 |
loading_complete = False
|
|
|
16 |
|
17 |
def load_feeds_in_background():
|
18 |
-
global loading_complete
|
19 |
try:
|
20 |
logger.info("Starting background RSS feed fetch")
|
21 |
articles = fetch_rss_feeds()
|
22 |
logger.info(f"Fetched {len(articles)} articles")
|
23 |
process_and_store_articles(articles)
|
|
|
24 |
logger.info("Background feed processing complete")
|
25 |
loading_complete = True
|
26 |
-
# Notify frontend of new data (simulated via SSE for simplicity)
|
27 |
-
def event_stream():
|
28 |
-
yield f"data: {json.dumps({'status': 'updated'})}\n\n"
|
29 |
-
app.response_class = Response
|
30 |
-
return Response(event_stream(), mimetype="text/event-stream")
|
31 |
except Exception as e:
|
32 |
logger.error(f"Error in background feed loading: {e}")
|
33 |
loading_complete = True
|
@@ -58,17 +56,23 @@ def index():
|
|
58 |
key = f"{title}|{link}"
|
59 |
if key not in seen_keys:
|
60 |
seen_keys.add(key)
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
enriched_articles.append({
|
62 |
"title": title,
|
63 |
"link": link,
|
64 |
"description": meta.get("original_description", "No Description"),
|
65 |
"category": meta.get("category", "Uncategorized"),
|
66 |
-
"published":
|
67 |
"image": meta.get("image", "svg"),
|
68 |
})
|
69 |
|
70 |
-
# Sort by published date (
|
71 |
-
enriched_articles.sort(key=lambda x: x["published"], reverse=True)
|
72 |
|
73 |
# Group by category and limit to 10 most recent per category
|
74 |
categorized_articles = {}
|
@@ -127,10 +131,59 @@ def search():
|
|
127 |
|
128 |
@app.route('/check_loading')
|
129 |
def check_loading():
|
130 |
-
global loading_complete
|
131 |
if loading_complete:
|
132 |
-
return jsonify({"status": "complete"})
|
133 |
return jsonify({"status": "loading"}), 202
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
if __name__ == "__main__":
|
136 |
app.run(host="0.0.0.0", port=7860)
|
|
|
1 |
import os
|
2 |
import threading
|
3 |
+
from flask import Flask, render_template, request, jsonify
|
4 |
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
|
5 |
import logging
|
6 |
+
import time
|
7 |
+
from datetime import datetime
|
8 |
|
9 |
app = Flask(__name__)
|
10 |
|
|
|
14 |
|
15 |
# Global flag to track background loading
|
16 |
loading_complete = False
|
17 |
+
last_update_time = time.time()
|
18 |
|
19 |
def load_feeds_in_background():
|
20 |
+
global loading_complete, last_update_time
|
21 |
try:
|
22 |
logger.info("Starting background RSS feed fetch")
|
23 |
articles = fetch_rss_feeds()
|
24 |
logger.info(f"Fetched {len(articles)} articles")
|
25 |
process_and_store_articles(articles)
|
26 |
+
last_update_time = time.time() # Update timestamp when new articles are added
|
27 |
logger.info("Background feed processing complete")
|
28 |
loading_complete = True
|
|
|
|
|
|
|
|
|
|
|
29 |
except Exception as e:
|
30 |
logger.error(f"Error in background feed loading: {e}")
|
31 |
loading_complete = True
|
|
|
56 |
key = f"{title}|{link}"
|
57 |
if key not in seen_keys:
|
58 |
seen_keys.add(key)
|
59 |
+
# Try to parse published date, fallback to string sorting
|
60 |
+
published = meta.get("published", "Unknown Date")
|
61 |
+
try:
|
62 |
+
published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
|
63 |
+
except (ValueError, TypeError):
|
64 |
+
pass # Keep as is if parsing fails
|
65 |
enriched_articles.append({
|
66 |
"title": title,
|
67 |
"link": link,
|
68 |
"description": meta.get("original_description", "No Description"),
|
69 |
"category": meta.get("category", "Uncategorized"),
|
70 |
+
"published": published,
|
71 |
"image": meta.get("image", "svg"),
|
72 |
})
|
73 |
|
74 |
+
# Sort by published date (handle both datetime and string)
|
75 |
+
enriched_articles.sort(key=lambda x: x["published"] if "Unknown" not in x["published"] else "1970-01-01", reverse=True)
|
76 |
|
77 |
# Group by category and limit to 10 most recent per category
|
78 |
categorized_articles = {}
|
|
|
131 |
|
132 |
@app.route('/check_loading')
|
133 |
def check_loading():
|
134 |
+
global loading_complete, last_update_time
|
135 |
if loading_complete:
|
136 |
+
return jsonify({"status": "complete", "last_update": last_update_time})
|
137 |
return jsonify({"status": "loading"}), 202
|
138 |
|
139 |
+
@app.route('/get_updates')
|
140 |
+
def get_updates():
|
141 |
+
global last_update_time
|
142 |
+
try:
|
143 |
+
all_docs = vector_db.get(include=['documents', 'metadatas'])
|
144 |
+
if not all_docs.get('metadatas'):
|
145 |
+
return jsonify({"articles": [], "last_update": last_update_time})
|
146 |
+
|
147 |
+
enriched_articles = []
|
148 |
+
seen_keys = set()
|
149 |
+
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
|
150 |
+
if not meta:
|
151 |
+
continue
|
152 |
+
title = meta.get("title", "No Title")
|
153 |
+
link = meta.get("link", "")
|
154 |
+
key = f"{title}|{link}"
|
155 |
+
if key not in seen_keys:
|
156 |
+
seen_keys.add(key)
|
157 |
+
published = meta.get("published", "Unknown Date")
|
158 |
+
try:
|
159 |
+
published = datetime.strptime(published, "%Y-%m-%d %H:%M:%S").isoformat() if "Unknown" not in published else published
|
160 |
+
except (ValueError, TypeError):
|
161 |
+
pass
|
162 |
+
enriched_articles.append({
|
163 |
+
"title": title,
|
164 |
+
"link": link,
|
165 |
+
"description": meta.get("original_description", "No Description"),
|
166 |
+
"category": meta.get("category", "Uncategorized"),
|
167 |
+
"published": published,
|
168 |
+
"image": meta.get("image", "svg"),
|
169 |
+
})
|
170 |
+
|
171 |
+
enriched_articles.sort(key=lambda x: x["published"], reverse=True)
|
172 |
+
categorized_articles = {}
|
173 |
+
for article in enriched_articles:
|
174 |
+
cat = article["category"]
|
175 |
+
if cat not in categorized_articles:
|
176 |
+
categorized_articles[cat] = []
|
177 |
+
categorized_articles[cat].append(article)
|
178 |
+
|
179 |
+
# Limit to 10 most recent per category
|
180 |
+
for cat in categorized_articles:
|
181 |
+
categorized_articles[cat] = sorted(categorized_articles[cat], key=lambda x: x["published"], reverse=True)[:10]
|
182 |
+
|
183 |
+
return jsonify({"articles": categorized_articles, "last_update": last_update_time})
|
184 |
+
except Exception as e:
|
185 |
+
logger.error(f"Error fetching updates: {e}")
|
186 |
+
return jsonify({"articles": {}, "last_update": last_update_time}), 500
|
187 |
+
|
188 |
if __name__ == "__main__":
|
189 |
app.run(host="0.0.0.0", port=7860)
|