Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import os
|
2 |
import threading
|
3 |
-
from flask import Flask, render_template, request, jsonify
|
4 |
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
|
5 |
import logging
|
|
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
|
@@ -22,9 +23,14 @@ def load_feeds_in_background():
|
|
22 |
process_and_store_articles(articles)
|
23 |
logger.info("Background feed processing complete")
|
24 |
loading_complete = True
|
|
|
|
|
|
|
|
|
|
|
25 |
except Exception as e:
|
26 |
logger.error(f"Error in background feed loading: {e}")
|
27 |
-
loading_complete = True
|
28 |
|
29 |
@app.route('/')
|
30 |
def index():
|
@@ -35,13 +41,13 @@ def index():
|
|
35 |
threading.Thread(target=load_feeds_in_background, daemon=True).start()
|
36 |
|
37 |
try:
|
38 |
-
# Retrieve
|
39 |
all_docs = vector_db.get(include=['documents', 'metadatas'])
|
40 |
if not all_docs.get('metadatas'):
|
41 |
logger.info("No articles in DB yet")
|
42 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
43 |
|
44 |
-
#
|
45 |
enriched_articles = []
|
46 |
seen_keys = set()
|
47 |
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
|
@@ -61,20 +67,25 @@ def index():
|
|
61 |
"image": meta.get("image", "svg"),
|
62 |
})
|
63 |
|
64 |
-
# Sort by published date (assuming
|
65 |
enriched_articles.sort(key=lambda x: x["published"], reverse=True)
|
66 |
-
recent_articles = enriched_articles[:10]
|
67 |
-
logger.info(f"Displaying {len(recent_articles)} recent articles")
|
68 |
|
69 |
-
#
|
70 |
categorized_articles = {}
|
71 |
-
for article in
|
72 |
cat = article["category"]
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
|
76 |
except Exception as e:
|
77 |
-
logger.error(f"Error retrieving
|
78 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
79 |
|
80 |
@app.route('/search', methods=['POST'])
|
@@ -114,7 +125,7 @@ def search():
|
|
114 |
logger.error(f"Search error: {e}")
|
115 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
|
116 |
|
117 |
-
@app.route('/check_loading'
|
118 |
def check_loading():
|
119 |
global loading_complete
|
120 |
if loading_complete:
|
|
|
1 |
import os
|
2 |
import threading
|
3 |
+
from flask import Flask, render_template, request, jsonify, Response
|
4 |
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
|
5 |
import logging
|
6 |
+
import json
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
|
|
|
23 |
process_and_store_articles(articles)
|
24 |
logger.info("Background feed processing complete")
|
25 |
loading_complete = True
|
26 |
+
# Notify frontend of new data (simulated via SSE for simplicity)
|
27 |
+
def event_stream():
|
28 |
+
yield f"data: {json.dumps({'status': 'updated'})}\n\n"
|
29 |
+
app.response_class = Response
|
30 |
+
return Response(event_stream(), mimetype="text/event-stream")
|
31 |
except Exception as e:
|
32 |
logger.error(f"Error in background feed loading: {e}")
|
33 |
+
loading_complete = True
|
34 |
|
35 |
@app.route('/')
|
36 |
def index():
|
|
|
41 |
threading.Thread(target=load_feeds_in_background, daemon=True).start()
|
42 |
|
43 |
try:
|
44 |
+
# Retrieve all articles from Chroma DB
|
45 |
all_docs = vector_db.get(include=['documents', 'metadatas'])
|
46 |
if not all_docs.get('metadatas'):
|
47 |
logger.info("No articles in DB yet")
|
48 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
49 |
|
50 |
+
# Process and categorize articles, getting 10 most recent per category
|
51 |
enriched_articles = []
|
52 |
seen_keys = set()
|
53 |
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
|
|
|
67 |
"image": meta.get("image", "svg"),
|
68 |
})
|
69 |
|
70 |
+
# Sort by published date (assuming ISO format or comparable string)
|
71 |
enriched_articles.sort(key=lambda x: x["published"], reverse=True)
|
|
|
|
|
72 |
|
73 |
+
# Group by category and limit to 10 most recent per category
|
74 |
categorized_articles = {}
|
75 |
+
for article in enriched_articles:
|
76 |
cat = article["category"]
|
77 |
+
if cat not in categorized_articles:
|
78 |
+
categorized_articles[cat] = []
|
79 |
+
categorized_articles[cat].append(article)
|
80 |
+
|
81 |
+
# Limit to 10 most recent per category
|
82 |
+
for cat in categorized_articles:
|
83 |
+
categorized_articles[cat] = sorted(categorized_articles[cat], key=lambda x: x["published"], reverse=True)[:10]
|
84 |
+
|
85 |
+
logger.info(f"Displaying articles: {sum(len(articles) for articles in categorized_articles.values())} total")
|
86 |
return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
|
87 |
except Exception as e:
|
88 |
+
logger.error(f"Error retrieving articles: {e}")
|
89 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)
|
90 |
|
91 |
@app.route('/search', methods=['POST'])
|
|
|
125 |
logger.error(f"Search error: {e}")
|
126 |
return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)
|
127 |
|
128 |
+
@app.route('/check_loading')
|
129 |
def check_loading():
|
130 |
global loading_complete
|
131 |
if loading_complete:
|