File size: 2,854 Bytes
3a7387c
37df149
33e2dac
7bafad1
cb518f2
d695e20
0aab8d6
3a7387c
 
 
cb518f2
 
 
 
1e338bc
 
 
 
 
 
 
 
 
d695e20
1e338bc
d695e20
1e338bc
37df149
33e2dac
3156b44
 
33e2dac
 
0aab8d6
 
33e2dac
3156b44
 
33e2dac
 
0aab8d6
3156b44
 
 
 
 
 
ce02056
1e338bc
 
3a7387c
ce02056
 
 
 
 
 
 
37df149
ce02056
1e338bc
 
 
 
 
 
 
 
 
 
 
 
 
3a7387c
be9be7d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import subprocess
from flask import Flask, render_template, request, Response, jsonify
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging
import time
import hashlib

app = Flask(__name__)

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def load_feeds_in_background():
    logger.info("Starting to fetch and process RSS feeds in background")
    start_time = time.time()
    articles = fetch_rss_feeds()
    logger.info(f"Fetched {len(articles)} articles")
    process_and_store_articles(articles)
    logger.info("Articles processed and stored")
    end_time = time.time()
    logger.info(f"RSS feed loading took {end_time - start_time:.2f} seconds")

@app.route('/')
def index():
    # Show existing articles immediately
    stored_docs = vector_db.similarity_search("news", k=1000)  # Show all available articles
    # Use a set to ensure unique articles by title, link, and description hash
    unique_articles = {}
    for doc in stored_docs:
        title = doc.metadata["title"]
        link = doc.metadata["link"]
        description = doc.metadata["original_description"]
        desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
        key = f"{title}|{link}|{desc_hash}"
        if key not in unique_articles:
            unique_articles[key] = {
                "title": title,
                "link": link,
                "description": description,
                "category": doc.metadata["category"],
                "published": doc.metadata["published"],
                "image": doc.metadata.get("image", "svg"),
            }
    enriched_articles = list(unique_articles.values())
    logger.info(f"Enriched {len(enriched_articles)} unique articles for display")

    # Start loading new feeds in the background
    subprocess.Popen(["python", "rss_processor.py", "load_feeds"])

    categorized_articles = {}
    for article in enriched_articles:
        cat = article["category"]
        if cat not in categorized_articles:
            categorized_articles[cat] = []
        categorized_articles[cat].append(article)

    return render_template("index.html", categorized_articles=categorized_articles, loading_new_feeds=True)

@app.route('/check_feeds', methods=['GET'])
def check_feeds():
    try:
        # Check if vector DB has new documents (simplified check)
        docs = vector_db.similarity_search("news", k=1)
        if docs:
            logger.info("Feeds loaded successfully in vector DB")
            return jsonify({"status": "loaded"})
        return jsonify({"status": "loading"}), 202
    except Exception as e:
        logger.error(f"Error checking feeds: {e}")
        return jsonify({"status": "error", "message": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)