Spaces:
Running
Running
File size: 2,935 Bytes
3a7387c 37df149 33e2dac 7bafad1 cb518f2 d695e20 0aab8d6 3a7387c cb518f2 1e338bc d695e20 1e338bc d695e20 e06bdde 33e2dac 3156b44 33e2dac 0aab8d6 e06bdde 33e2dac 3156b44 33e2dac 0aab8d6 3156b44 ce02056 1e338bc 3a7387c ce02056 37df149 ce02056 1e338bc e06bdde 1e338bc 3a7387c be9be7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import subprocess
from flask import Flask, render_template, request, Response, jsonify
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
import time
import hashlib
app = Flask(__name__)
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_feeds_in_background():
logger.info("Starting to fetch and process RSS feeds in background")
start_time = time.time()
articles = fetch_rss_feeds()
logger.info(f"Fetched {len(articles)} articles")
process_and_store_articles(articles)
logger.info("Articles processed and stored")
end_time = time.time()
logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds")
@app.route('/')
def index():
# Show existing articles immediately, even if empty
stored_docs = vector_db.similarity_search("news", k=1000) # Try to retrieve all available articles
logger.info(f"Found {len(stored_docs)} documents in vector DB")
# Use a set to ensure unique articles by title, link, and description hash
unique_articles = {}
for doc in stored_docs:
title = doc.metadata["title"]
link = doc.metadata["link"]
description = doc.metadata["original_description"]
desc_hash = hashlib.md5(description.encode()).hexdigest()
key = f"{title}|{link}|{desc_hash}"
if key not in unique_articles:
unique_articles[key] = {
"title": title,
"link": link,
"description": description,
"category": doc.metadata["category"],
"published": doc.metadata["published"],
"image": doc.metadata.get("image", "svg"),
}
enriched_articles = list(unique_articles.values())
logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
# Start loading new feeds in the background
subprocess.Popen(["python", "rss_processor.py", "load_feeds"])
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
if cat not in categorized_articles:
categorized_articles[cat] = []
categorized_articles[cat].append(article)
return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True)
@app.route('/check_feeds', methods=['GET'])
def check_feeds():
try:
# Check if vector DB has new or updated documents
docs = vector_db.similarity_search("news", k=1)
if docs:
logger.info("Feeds loaded successfully in vector DB")
return jsonify({"status": "loaded"})
return jsonify({"status": "loading"}), 202
except Exception as e:
logger.error(f"Error checking feeds: {e}")
return jsonify({"status": "error", "message": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |