Spaces:
Running
Running
File size: 3,941 Bytes
3a7387c 72c3c36 7bafad1 cb518f2 3a7387c cb518f2 1e338bc d695e20 9a3bd4a 5d47c6a 9a3bd4a 5d47c6a 72c3c36 5d47c6a 9a3bd4a 5d47c6a 3a7387c 5d47c6a ce02056 5d47c6a 9a3bd4a 5d47c6a 9a3bd4a 5d47c6a ce02056 5d47c6a 1e338bc 5d47c6a 1e338bc 3a7387c be9be7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
from flask import Flask, render_template, request, jsonify
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
app = Flask(__name__)
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.route('/')
def index():
try:
# Fetch and store articles synchronously on first load
articles = fetch_rss_feeds()
logger.info(f"Fetched {len(articles)} articles")
process_and_store_articles(articles)
logger.info("Articles processed and stored")
# Retrieve all articles from Chroma DB
all_docs = vector_db.get(include=['documents', 'metadatas'])
if not all_docs.get('metadatas'):
logger.warning("No articles in DB yet")
return render_template("index.html", categorized_articles={}, has_articles=False)
# Process retrieved documents
enriched_articles = []
seen_keys = set()
for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
if not meta:
continue
title = meta.get("title", "No Title")
link = meta.get("link", "")
description = meta.get("original_description", "No Description")
key = f"{title}|{link}"
if key not in seen_keys:
seen_keys.add(key)
enriched_articles.append({
"title": title,
"link": link,
"description": description,
"category": meta.get("category", "Uncategorized"),
"published": meta.get("published", "Unknown Date"),
"image": meta.get("image", "svg"),
})
logger.info(f"Displaying {len(enriched_articles)} unique articles")
# Categorize articles
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
categorized_articles.setdefault(cat, []).append(article)
return render_template("index.html", categorized_articles=categorized_articles, has_articles=True)
except Exception as e:
logger.error(f"Error in index: {e}")
return render_template("index.html", categorized_articles={}, has_articles=False)
@app.route('/search', methods=['POST'])
def search():
query = request.form.get('search')
if not query:
return render_template("index.html", categorized_articles={}, has_articles=False)
try:
logger.info(f"Searching for: {query}")
results = vector_db.similarity_search(query, k=10)
enriched_articles = []
seen_keys = set()
for doc in results:
meta = doc.metadata
title = meta.get("title", "No Title")
link = meta.get("link", "")
description = meta.get("original_description", "No Description")
key = f"{title}|{link}"
if key not in seen_keys:
seen_keys.add(key)
enriched_articles.append({
"title": title,
"link": link,
"description": description,
"category": meta.get("category", "Uncategorized"),
"published": meta.get("published", "Unknown Date"),
"image": meta.get("image", "svg"),
})
categorized_articles = {}
for article in enriched_articles:
cat = article["category"]
categorized_articles.setdefault(cat, []).append(article)
return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles))
except Exception as e:
logger.error(f"Search error: {e}")
return render_template("index.html", categorized_articles={}, has_articles=False)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |