Spaces:
Running
Running
File size: 2,803 Bytes
3a7387c 7bafad1 cb518f2 3a7387c cb518f2 ce02056 3a7387c cb518f2 3a7387c cb518f2 7bafad1 cb518f2 3a7387c 3156b44 ce02056 1f5e987 ce02056 cb518f2 ce02056 3156b44 3a7387c ce02056 3a7387c 6680594 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
from flask import Flask, render_template, request
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
app = Flask(__name__)
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.route('/', methods=['GET', 'POST'])
def index():
logger.info("Starting to fetch RSS feeds")
articles = fetch_rss_feeds()
logger.info(f"Fetched {len(articles)} articles")
process_and_store_articles(articles)
logger.info("Articles processed and stored")
stored_docs = vector_db.similarity_search("news", k=len(articles))
# Use a set to ensure unique articles by title and link
unique_articles = {}
for doc in stored_docs:
key = f"{doc.metadata['title']}|{doc.metadata['link']}"
if key not in unique_articles:
unique_articles[key] = {
"title": doc.metadata["title"],
"link": doc.metadata["link"],
"description": doc.metadata["original_description"],
"category": doc.metadata["category"],
"published": doc.metadata["published"],
"image": doc.metadata.get("image", "svg"),
}
enriched_articles = list(unique_articles.values())
logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
if request.method == 'POST' and 'search' in request.form:
query = request.form.get('search')
if query:
logger.info(f"Processing search query: {query}")
results = vector_db.similarity_search(query, k=10)
unique_search_articles = {}
for doc in results:
key = f"{doc.metadata['title']}|{doc.metadata['link']}"
if key not in unique_search_articles:
unique_search_articles[key] = {
"title": doc.metadata["title"],
"link": doc.metadata["link"],
"description": doc.metadata["original_description"],
"category": doc.metadata["category"],
"published": doc.metadata["published"],
"image": doc.metadata.get("image", "svg"),
}
enriched_articles = list(unique_search_articles.values())
logger.info(f"Search returned {len(enriched_articles)} unique results")
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
if cat not in categorized_articles:
categorized_articles[cat] = []
categorized_articles[cat].append(article)
return render_template("index.html", categorized_articles=categorized_articles)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |