Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,28 +12,19 @@ app = Flask(__name__)
|
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
# Check if vector DB has documents
|
25 |
-
docs = vector_db.similarity_search("news", k=1)
|
26 |
-
if docs:
|
27 |
-
logger.info("Feeds loaded successfully in vector DB")
|
28 |
-
return jsonify({"status": "loaded"})
|
29 |
-
return jsonify({"status": "loading"}), 202
|
30 |
-
except Exception as e:
|
31 |
-
logger.error(f"Error checking feeds: {e}")
|
32 |
-
return jsonify({"status": "error", "message": str(e)}), 500
|
33 |
|
34 |
-
@app.route('/
|
35 |
def index():
|
36 |
-
# Show existing articles
|
37 |
stored_docs = vector_db.similarity_search("news", k=1000) # Show all available articles
|
38 |
# Use a set to ensure unique articles by title, link, and description hash
|
39 |
unique_articles = {}
|
@@ -55,29 +46,8 @@ def index():
|
|
55 |
enriched_articles = list(unique_articles.values())
|
56 |
logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
if query:
|
61 |
-
logger.info(f"Processing search query: {query}")
|
62 |
-
results = vector_db.similarity_search(query, k=10)
|
63 |
-
unique_search_articles = {}
|
64 |
-
for doc in results:
|
65 |
-
title = doc.metadata["title"]
|
66 |
-
link = doc.metadata["link"]
|
67 |
-
description = doc.metadata["original_description"]
|
68 |
-
desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
|
69 |
-
key = f"{title}|{link}|{desc_hash}"
|
70 |
-
if key not in unique_search_articles:
|
71 |
-
unique_search_articles[key] = {
|
72 |
-
"title": title,
|
73 |
-
"link": link,
|
74 |
-
"description": description,
|
75 |
-
"category": doc.metadata["category"],
|
76 |
-
"published": doc.metadata["published"],
|
77 |
-
"image": doc.metadata.get("image", "svg"),
|
78 |
-
}
|
79 |
-
enriched_articles = list(unique_search_articles.values())
|
80 |
-
logger.info(f"Search returned {len(enriched_articles)} unique results")
|
81 |
|
82 |
categorized_articles = {}
|
83 |
for article in enriched_articles:
|
@@ -88,5 +58,18 @@ def index():
|
|
88 |
|
89 |
return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True)
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
if __name__ == "__main__":
|
92 |
app.run(host="0.0.0.0", port=7860)
|
|
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
+
def load_feeds_in_background():
|
16 |
+
logger.info("Starting to fetch and process RSS feeds in background")
|
17 |
+
start_time = time.time()
|
18 |
+
articles = fetch_rss_feeds()
|
19 |
+
logger.info(f"Fetched {len(articles)} articles")
|
20 |
+
process_and_store_articles(articles)
|
21 |
+
logger.info("Articles processed and stored")
|
22 |
+
end_time = time.time()
|
23 |
+
logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
@app.route('/')
|
26 |
def index():
|
27 |
+
# Show existing articles immediately
|
28 |
stored_docs = vector_db.similarity_search("news", k=1000) # Show all available articles
|
29 |
# Use a set to ensure unique articles by title, link, and description hash
|
30 |
unique_articles = {}
|
|
|
46 |
enriched_articles = list(unique_articles.values())
|
47 |
logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
|
48 |
|
49 |
+
# Start loading new feeds in the background
|
50 |
+
subprocess.Popen(["python", "rss_processor.py", "load_feeds"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
categorized_articles = {}
|
53 |
for article in enriched_articles:
|
|
|
58 |
|
59 |
return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True)
|
60 |
|
61 |
+
@app.route('/check_feeds', methods=['GET'])
|
62 |
+
def check_feeds():
|
63 |
+
try:
|
64 |
+
# Check if vector DB has new documents (simplified check)
|
65 |
+
docs = vector_db.similarity_search("news", k=1)
|
66 |
+
if docs:
|
67 |
+
logger.info("Feeds loaded successfully in vector DB")
|
68 |
+
return jsonify({"status": "loaded"})
|
69 |
+
return jsonify({"status": "loading"}), 202
|
70 |
+
except Exception as e:
|
71 |
+
logger.error(f"Error checking feeds: {e}")
|
72 |
+
return jsonify({"status": "error", "message": str(e)}), 500
|
73 |
+
|
74 |
if __name__ == "__main__":
|
75 |
app.run(host="0.0.0.0", port=7860)
|