import os from flask import Flask, render_template, request, Response from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db import logging import time app = Flask(__name__) # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @app.route('/') def loading(): return render_template("loading.html") @app.route('/load_feeds', methods=['GET']) def load_feeds(): logger.info("Starting to fetch and process RSS feeds") start_time = time.time() articles = fetch_rss_feeds() logger.info(f"Fetched {len(articles)} articles") process_and_store_articles(articles) logger.info("Articles processed and stored") end_time = time.time() logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds") return Response("Feeds loaded", status=200) @app.route('/index', methods=['GET']) def index(): stored_docs = vector_db.similarity_search("news", k=1000) # Increased k to ensure all unique articles # Use a set to ensure unique articles by title and link unique_articles = {} for doc in stored_docs: key = f"{doc.metadata['title']}|{doc.metadata['link']}" if key not in unique_articles: unique_articles[key] = { "title": doc.metadata["title"], "link": doc.metadata["link"], "description": doc.metadata["original_description"], "category": doc.metadata["category"], "published": doc.metadata["published"], "image": doc.metadata.get("image", "svg"), } enriched_articles = list(unique_articles.values()) logger.info(f"Enriched {len(enriched_articles)} unique articles for display") if request.method == 'POST' and 'search' in request.form: query = request.form.get('search') if query: logger.info(f"Processing search query: {query}") results = vector_db.similarity_search(query, k=10) unique_search_articles = {} for doc in results: key = f"{doc.metadata['title']}|{doc.metadata['link']}" if key not in unique_search_articles: unique_search_articles[key] = { "title": doc.metadata["title"], "link": doc.metadata["link"], "description": doc.metadata["original_description"], "category": doc.metadata["category"], "published": doc.metadata["published"], "image": doc.metadata.get("image", "svg"), } enriched_articles = list(unique_search_articles.values()) logger.info(f"Search returned {len(enriched_articles)} unique results") categorized_articles = {} for article in enriched_articles: cat = article["category"] if cat not in categorized_articles: categorized_articles[cat] = [] categorized_articles[cat].append(article) return render_template("index.html", categorized_articles=categorized_articles) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)