Spaces:

broadfield-dev
/

grok_test

Runtime error

App Files Files Community

grok_test / app.py

broadfield-dev

Update app.py

72c3c36 verified 5 months ago

raw

history blame

5.83 kB

	import os
	import subprocess
	from flask import Flask, render_template, request, jsonify
	from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
	import logging
	import time
	import hashlib

	app = Flask(__name__)

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def load_feeds_in_background():
	logger.info("Starting to fetch and process RSS feeds in background")
	start_time = time.time()
	articles = fetch_rss_feeds()
	logger.info(f"Fetched {len(articles)} articles")
	process_and_store_articles(articles)
	logger.info("Articles processed and stored")
	end_time = time.time()
	logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds")

	@app.route('/')
	def index():
	# Show all existing articles immediately, even if empty
	try:
	# Get all documents from Chroma DB using get()
	all_docs = vector_db.get(include=['documents', 'metadatas'])
	if 'metadatas' in all_docs and all_docs['metadatas']:
	stored_docs = [
	Document(page_content=doc['documents'][0] if doc['documents'] else "", metadata=meta)
	for doc, meta in zip(all_docs['documents'], all_docs['metadatas'])
	]
	logger.info(f"Found {len(stored_docs)} documents in vector DB")
	else:
	stored_docs = []
	logger.warning("No metadata or documents found in vector DB")
	# Use a set to ensure unique articles by title, link, and full description hash
	unique_articles = {}
	for doc in stored_docs:
	if not doc.metadata: # Handle potential None metadata
	continue
	title = doc.metadata.get("title", "No Title")
	link = doc.metadata.get("link", "")
	description = doc.metadata.get("original_description", "No Description")
	desc_hash = hashlib.md5(description.encode()).hexdigest()
	key = f"{title}\|{link}\|{desc_hash}"
	if key not in unique_articles:
	unique_articles[key] = {
	"title": title,
	"link": link,
	"description": description,
	"category": doc.metadata.get("category", "Uncategorized"),
	"published": doc.metadata.get("published", "Unknown Date"),
	"image": doc.metadata.get("image", "svg"),
	}
	enriched_articles = list(unique_articles.values())
	logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
	except Exception as e:
	logger.error(f"Error retrieving documents from vector DB: {e}")
	enriched_articles = [] # Fallback if DB is empty or inaccessible

	# Start loading new feeds in the background
	subprocess.Popen(["python", "rss_processor.py", "load_feeds"])

	categorized_articles = {}
	for article in enriched_articles:
	cat = article["category"]
	if cat not in categorized_articles:
	categorized_articles[cat] = []
	categorized_articles[cat].append(article)

	return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True, has_articles=bool(enriched_articles))

	@app.route('/search', methods=['POST'])
	def search():
	query = request.form.get('search')
	if query:
	logger.info(f"Processing search query: {query}")
	try:
	results = vector_db.similarity_search(query, k=10)
	unique_search_articles = {}
	for doc in results:
	title = doc.metadata.get("title", "No Title")
	link = doc.metadata.get("link", "")
	description = doc.metadata.get("original_description", "No Description")
	desc_hash = hashlib.md5(description.encode()).hexdigest()
	key = f"{title}\|{link}\|{desc_hash}"
	if key not in unique_search_articles:
	unique_search_articles[key] = {
	"title": title,
	"link": link,
	"description": description,
	"category": doc.metadata.get("category", "Uncategorized"),
	"published": doc.metadata.get("published", "Unknown Date"),
	"image": doc.metadata.get("image", "svg"),
	}
	enriched_articles = list(unique_search_articles.values())
	logger.info(f"Search returned {len(enriched_articles)} unique results")
	except Exception as e:
	logger.error(f"Error performing search: {e}")
	enriched_articles = []

	categorized_articles = {}
	for article in enriched_articles:
	cat = article["category"]
	if cat not in categorized_articles:
	categorized_articles[cat] = []
	categorized_articles[cat].append(article)

	return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True, has_articles=bool(enriched_articles))
	return render_template("index.html", categorized_articles={}, loading_new_feeds=True, has_articles=False)

	@app.route('/check_feeds', methods=['GET'])
	def check_feeds():
	try:
	# Check if vector DB has any documents
	all_docs = vector_db.get(include=['documents', 'metadatas'])
	if 'metadatas' in all_docs and all_docs['metadatas']:
	logger.info("Feeds loaded successfully in vector DB")
	return jsonify({"status": "loaded"})
	return jsonify({"status": "loading"}), 202
	except Exception as e:
	logger.error(f"Error checking feeds: {e}")
	return jsonify({"status": "error", "message": str(e)}), 500

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)