grok_test / app.py
broadfield-dev's picture
Update app.py
d695e20 verified
raw
history blame
3.19 kB
import os
from flask import Flask, render_template, request, Response
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
import time
app = Flask(__name__)
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.route('/')
def loading():
return render_template("loading.html")
@app.route('/load_feeds', methods=['GET'])
def load_feeds():
logger.info("Starting to fetch and process RSS feeds")
start_time = time.time()
articles = fetch_rss_feeds()
logger.info(f"Fetched {len(articles)} articles")
process_and_store_articles(articles)
logger.info("Articles processed and stored")
end_time = time.time()
logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds")
return Response("Feeds loaded", status=200)
@app.route('/index', methods=['GET'])
def index():
stored_docs = vector_db.similarity_search("news", k=1000) # Increased k to ensure all unique articles
# Use a set to ensure unique articles by title and link
unique_articles = {}
for doc in stored_docs:
key = f"{doc.metadata['title']}|{doc.metadata['link']}"
if key not in unique_articles:
unique_articles[key] = {
"title": doc.metadata["title"],
"link": doc.metadata["link"],
"description": doc.metadata["original_description"],
"category": doc.metadata["category"],
"published": doc.metadata["published"],
"image": doc.metadata.get("image", "svg"),
}
enriched_articles = list(unique_articles.values())
logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
if request.method == 'POST' and 'search' in request.form:
query = request.form.get('search')
if query:
logger.info(f"Processing search query: {query}")
results = vector_db.similarity_search(query, k=10)
unique_search_articles = {}
for doc in results:
key = f"{doc.metadata['title']}|{doc.metadata['link']}"
if key not in unique_search_articles:
unique_search_articles[key] = {
"title": doc.metadata["title"],
"link": doc.metadata["link"],
"description": doc.metadata["original_description"],
"category": doc.metadata["category"],
"published": doc.metadata["published"],
"image": doc.metadata.get("image", "svg"),
}
enriched_articles = list(unique_search_articles.values())
logger.info(f"Search returned {len(enriched_articles)} unique results")
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
if cat not in categorized_articles:
categorized_articles[cat] = []
categorized_articles[cat].append(article)
return render_template("index.html", categorized_articles=categorized_articles)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)