broadfield-dev commited on
Commit
0aab8d6
·
verified ·
1 Parent(s): 24922e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -16
app.py CHANGED
@@ -4,6 +4,7 @@ from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
4
  import logging
5
  import time
6
  from threading import Thread
 
7
 
8
  app = Flask(__name__)
9
 
@@ -32,9 +33,10 @@ def loading():
32
  @app.route('/check_feeds', methods=['GET'])
33
  def check_feeds():
34
  try:
35
- # Check if vector DB has documents (simplified check)
36
  docs = vector_db.similarity_search("news", k=1)
37
  if docs:
 
38
  return jsonify({"status": "loaded"})
39
  return jsonify({"status": "loading"}), 202
40
  except Exception as e:
@@ -43,28 +45,20 @@ def check_feeds():
43
 
44
  @app.route('/index', methods=['GET'])
45
  def index():
46
- # Poll until feeds are loaded
47
- while True:
48
- response = check_feeds()
49
- if response.status_code == 200 and response.get_json()["status"] == "loaded":
50
- break
51
- time.sleep(1) # Check every second
52
-
53
- stored_docs = vector_db.similarity_search("news", k=1000) # Increased k for all unique articles
54
  # Use a set to ensure unique articles by title, link, and description hash
55
  unique_articles = {}
56
  for doc in stored_docs:
57
- import hashlib
58
  title = doc.metadata["title"]
59
  link = doc.metadata["link"]
60
- desc = doc.metadata["original_description"]
61
- desc_hash = hashlib.md5(desc.encode()).hexdigest()[:10] # Short hash for uniqueness
62
  key = f"{title}|{link}|{desc_hash}"
63
  if key not in unique_articles:
64
  unique_articles[key] = {
65
  "title": title,
66
  "link": link,
67
- "description": doc.metadata["original_description"],
68
  "category": doc.metadata["category"],
69
  "published": doc.metadata["published"],
70
  "image": doc.metadata.get("image", "svg"),
@@ -81,14 +75,14 @@ def index():
81
  for doc in results:
82
  title = doc.metadata["title"]
83
  link = doc.metadata["link"]
84
- desc = doc.metadata["original_description"]
85
- desc_hash = hashlib.md5(desc.encode()).hexdigest()[:10]
86
  key = f"{title}|{link}|{desc_hash}"
87
  if key not in unique_search_articles:
88
  unique_search_articles[key] = {
89
  "title": title,
90
  "link": link,
91
- "description": doc.metadata["original_description"],
92
  "category": doc.metadata["category"],
93
  "published": doc.metadata["published"],
94
  "image": doc.metadata.get("image", "svg"),
 
4
  import logging
5
  import time
6
  from threading import Thread
7
+ import hashlib
8
 
9
  app = Flask(__name__)
10
 
 
33
  @app.route('/check_feeds', methods=['GET'])
34
  def check_feeds():
35
  try:
36
+ # Check if vector DB has documents
37
  docs = vector_db.similarity_search("news", k=1)
38
  if docs:
39
+ logger.info("Feeds loaded successfully in vector DB")
40
  return jsonify({"status": "loaded"})
41
  return jsonify({"status": "loading"}), 202
42
  except Exception as e:
 
45
 
46
  @app.route('/index', methods=['GET'])
47
  def index():
48
+ stored_docs = vector_db.similarity_search("news", k=1000) # Ensure all unique articles
 
 
 
 
 
 
 
49
  # Use a set to ensure unique articles by title, link, and description hash
50
  unique_articles = {}
51
  for doc in stored_docs:
 
52
  title = doc.metadata["title"]
53
  link = doc.metadata["link"]
54
+ description = doc.metadata["original_description"]
55
+ desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
56
  key = f"{title}|{link}|{desc_hash}"
57
  if key not in unique_articles:
58
  unique_articles[key] = {
59
  "title": title,
60
  "link": link,
61
+ "description": description,
62
  "category": doc.metadata["category"],
63
  "published": doc.metadata["published"],
64
  "image": doc.metadata.get("image", "svg"),
 
75
  for doc in results:
76
  title = doc.metadata["title"]
77
  link = doc.metadata["link"]
78
+ description = doc.metadata["original_description"]
79
+ desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
80
  key = f"{title}|{link}|{desc_hash}"
81
  if key not in unique_search_articles:
82
  unique_search_articles[key] = {
83
  "title": title,
84
  "link": link,
85
+ "description": description,
86
  "category": doc.metadata["category"],
87
  "published": doc.metadata["published"],
88
  "image": doc.metadata.get("image", "svg"),