File size: 5,011 Bytes
3a7387c
37fdec4
72c3c36
7bafad1
cb518f2
3a7387c
 
 
cb518f2
 
 
 
37fdec4
 
 
 
 
9a3bd4a
37fdec4
5d47c6a
 
 
37fdec4
 
 
 
 
 
 
 
 
 
 
 
 
5d47c6a
37fdec4
 
9a3bd4a
5d47c6a
37fdec4
 
5d47c6a
37fdec4
5d47c6a
 
 
 
72c3c36
5d47c6a
 
 
 
 
 
9a3bd4a
 
37fdec4
5d47c6a
 
 
 
3a7387c
37fdec4
 
 
 
 
 
5d47c6a
37fdec4
5d47c6a
 
ce02056
37fdec4
5d47c6a
37fdec4
 
9a3bd4a
 
 
 
5d47c6a
37fdec4
5d47c6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37fdec4
5d47c6a
 
 
 
9a3bd4a
 
 
 
5d47c6a
ce02056
37fdec4
1e338bc
5d47c6a
37fdec4
 
 
 
 
 
 
 
1e338bc
3a7387c
be9be7d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import threading
from flask import Flask, render_template, request, jsonify
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging

app = Flask(__name__)

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Global flag to track background loading
loading_complete = False

def load_feeds_in_background():
    global loading_complete
    try:
        logger.info("Starting background RSS feed fetch")
        articles = fetch_rss_feeds()
        logger.info(f"Fetched {len(articles)} articles")
        process_and_store_articles(articles)
        logger.info("Background feed processing complete")
        loading_complete = True
    except Exception as e:
        logger.error(f"Error in background feed loading: {e}")
        loading_complete = True  # Mark as complete even on error to avoid infinite polling

@app.route('/')
def index():
    global loading_complete
    loading_complete = False  # Reset on each load

    # Start background feed loading
    threading.Thread(target=load_feeds_in_background, daemon=True).start()

    try:
        # Retrieve the 10 most recent articles from Chroma DB
        all_docs = vector_db.get(include=['documents', 'metadatas'])
        if not all_docs.get('metadatas'):
            logger.info("No articles in DB yet")
            return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)

        # Sort by 'published' date (if available) and take top 10
        enriched_articles = []
        seen_keys = set()
        for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
            if not meta:
                continue
            title = meta.get("title", "No Title")
            link = meta.get("link", "")
            key = f"{title}|{link}"
            if key not in seen_keys:
                seen_keys.add(key)
                enriched_articles.append({
                    "title": title,
                    "link": link,
                    "description": meta.get("original_description", "No Description"),
                    "category": meta.get("category", "Uncategorized"),
                    "published": meta.get("published", "Unknown Date"),
                    "image": meta.get("image", "svg"),
                })

        # Sort by published date (assuming it's in a parseable format; fallback to order if not)
        enriched_articles.sort(key=lambda x: x["published"], reverse=True)
        recent_articles = enriched_articles[:10]
        logger.info(f"Displaying {len(recent_articles)} recent articles")

        # Categorize recent articles
        categorized_articles = {}
        for article in recent_articles:
            cat = article["category"]
            categorized_articles.setdefault(cat, []).append(article)

        return render_template("index.html", categorized_articles=categorized_articles, has_articles=True, loading=True)
    except Exception as e:
        logger.error(f"Error retrieving recent articles: {e}")
        return render_template("index.html", categorized_articles={}, has_articles=False, loading=True)

@app.route('/search', methods=['POST'])
def search():
    query = request.form.get('search')
    if not query:
        return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)

    try:
        logger.info(f"Searching for: {query}")
        results = vector_db.similarity_search(query, k=10)
        enriched_articles = []
        seen_keys = set()
        for doc in results:
            meta = doc.metadata
            title = meta.get("title", "No Title")
            link = meta.get("link", "")
            key = f"{title}|{link}"
            if key not in seen_keys:
                seen_keys.add(key)
                enriched_articles.append({
                    "title": title,
                    "link": link,
                    "description": meta.get("original_description", "No Description"),
                    "category": meta.get("category", "Uncategorized"),
                    "published": meta.get("published", "Unknown Date"),
                    "image": meta.get("image", "svg"),
                })

        categorized_articles = {}
        for article in enriched_articles:
            cat = article["category"]
            categorized_articles.setdefault(cat, []).append(article)

        return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles), loading=False)
    except Exception as e:
        logger.error(f"Search error: {e}")
        return render_template("index.html", categorized_articles={}, has_articles=False, loading=False)

@app.route('/check_loading', methods=['GET'])
def check_loading():
    global loading_complete
    if loading_complete:
        return jsonify({"status": "complete"})
    return jsonify({"status": "loading"}), 202

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)