File size: 3,941 Bytes
3a7387c
72c3c36
7bafad1
cb518f2
3a7387c
 
 
cb518f2
 
 
 
1e338bc
d695e20
9a3bd4a
5d47c6a
 
 
 
 
 
 
9a3bd4a
5d47c6a
 
 
 
 
 
 
 
 
72c3c36
5d47c6a
 
 
 
 
 
 
9a3bd4a
 
 
5d47c6a
 
 
 
 
3a7387c
5d47c6a
 
 
 
 
ce02056
5d47c6a
 
 
 
9a3bd4a
 
 
 
5d47c6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a3bd4a
 
 
 
5d47c6a
ce02056
5d47c6a
1e338bc
5d47c6a
 
1e338bc
3a7387c
be9be7d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
from flask import Flask, render_template, request, jsonify
from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
import logging

app = Flask(__name__)

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@app.route('/')
def index():
    try:
        # Fetch and store articles synchronously on first load
        articles = fetch_rss_feeds()
        logger.info(f"Fetched {len(articles)} articles")
        process_and_store_articles(articles)
        logger.info("Articles processed and stored")

        # Retrieve all articles from Chroma DB
        all_docs = vector_db.get(include=['documents', 'metadatas'])
        if not all_docs.get('metadatas'):
            logger.warning("No articles in DB yet")
            return render_template("index.html", categorized_articles={}, has_articles=False)

        # Process retrieved documents
        enriched_articles = []
        seen_keys = set()
        for doc, meta in zip(all_docs['documents'], all_docs['metadatas']):
            if not meta:
                continue
            title = meta.get("title", "No Title")
            link = meta.get("link", "")
            description = meta.get("original_description", "No Description")
            key = f"{title}|{link}"
            if key not in seen_keys:
                seen_keys.add(key)
                enriched_articles.append({
                    "title": title,
                    "link": link,
                    "description": description,
                    "category": meta.get("category", "Uncategorized"),
                    "published": meta.get("published", "Unknown Date"),
                    "image": meta.get("image", "svg"),
                })
        logger.info(f"Displaying {len(enriched_articles)} unique articles")

        # Categorize articles
        categorized_articles = {}
        for article in enriched_articles:
            cat = article["category"]
            categorized_articles.setdefault(cat, []).append(article)

        return render_template("index.html", categorized_articles=categorized_articles, has_articles=True)
    except Exception as e:
        logger.error(f"Error in index: {e}")
        return render_template("index.html", categorized_articles={}, has_articles=False)

@app.route('/search', methods=['POST'])
def search():
    query = request.form.get('search')
    if not query:
        return render_template("index.html", categorized_articles={}, has_articles=False)

    try:
        logger.info(f"Searching for: {query}")
        results = vector_db.similarity_search(query, k=10)
        enriched_articles = []
        seen_keys = set()
        for doc in results:
            meta = doc.metadata
            title = meta.get("title", "No Title")
            link = meta.get("link", "")
            description = meta.get("original_description", "No Description")
            key = f"{title}|{link}"
            if key not in seen_keys:
                seen_keys.add(key)
                enriched_articles.append({
                    "title": title,
                    "link": link,
                    "description": description,
                    "category": meta.get("category", "Uncategorized"),
                    "published": meta.get("published", "Unknown Date"),
                    "image": meta.get("image", "svg"),
                })

        categorized_articles = {}
        for article in enriched_articles:
            cat = article["category"]
            categorized_articles.setdefault(cat, []).append(article)

        return render_template("index.html", categorized_articles=categorized_articles, has_articles=bool(enriched_articles))
    except Exception as e:
        logger.error(f"Search error: {e}")
        return render_template("index.html", categorized_articles={}, has_articles=False)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)