broadfield-dev commited on
Commit
3156b44
·
verified ·
1 Parent(s): 22b94f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -17,36 +17,41 @@ def index():
17
  process_and_store_articles(articles)
18
  logger.info("Articles processed and stored")
19
  stored_docs = vector_db.similarity_search("news", k=len(articles))
20
- enriched_articles = [
21
- {
22
- "title": doc.metadata["title"],
23
- "link": doc.metadata["link"],
24
- "description": doc.metadata["original_description"],
25
- "category": doc.metadata["category"],
26
- "published": doc.metadata["published"],
27
- "image": doc.metadata.get("image", "svg"),
28
- }
29
- for doc in stored_docs
30
- ]
31
- logger.info(f"Enriched {len(enriched_articles)} articles for display")
 
 
 
32
 
33
  if request.method == 'POST' and 'search' in request.form:
34
  query = request.form.get('search')
35
  if query:
36
  logger.info(f"Processing search query: {query}")
37
  results = vector_db.similarity_search(query, k=10)
38
- enriched_articles = [
39
- {
40
- "title": doc.metadata["title"],
41
- "link": doc.metadata["link"],
42
- "description": doc.metadata["original_description"],
43
- "category": doc.metadata["category"],
44
- "published": doc.metadata["published"],
45
- "image": doc.metadata.get("image", "svg"),
46
- }
47
- for doc in results
48
- ]
49
- logger.info(f"Search returned {len(enriched_articles)} results")
 
 
50
 
51
  categorized_articles = {}
52
  for article in enriched_articles:
 
17
  process_and_store_articles(articles)
18
  logger.info("Articles processed and stored")
19
  stored_docs = vector_db.similarity_search("news", k=len(articles))
20
+ # Use a set to ensure unique articles by title and link
21
+ unique_articles = {}
22
+ for doc in stored_docs:
23
+ key = f"{doc.metadata['title']}|{doc.metadata['link']}"
24
+ if key not in unique_articles:
25
+ unique_articles[key] = {
26
+ "title": doc.metadata["title"],
27
+ "link": doc.metadata["link"],
28
+ "description": doc.metadata["original_description"],
29
+ "category": doc.metadata["category"],
30
+ "published": doc.metadata["published"],
31
+ "image": doc.metadata.get("image", "svg"),
32
+ }
33
+ enriched_articles = list(unique_articles.values())
34
+ logger.info(f"Enriched {len(enriched_articles)} unique articles for display")
35
 
36
  if request.method == 'POST' and 'search' in request.form:
37
  query = request.form.get('search')
38
  if query:
39
  logger.info(f"Processing search query: {query}")
40
  results = vector_db.similarity_search(query, k=10)
41
+ unique_search_articles = {}
42
+ for doc in results:
43
+ key = f"{doc.metadata['title']}|{doc.metadata['link']}"
44
+ if key not in unique_search_articles:
45
+ unique_search_articles[key] = {
46
+ "title": doc.metadata["title"],
47
+ "link": doc.metadata["link"],
48
+ "description": doc.metadata["original_description"],
49
+ "category": doc.metadata["category"],
50
+ "published": doc.metadata["published"],
51
+ "image": doc.metadata.get("image", "svg"),
52
+ }
53
+ enriched_articles = list(unique_search_articles.values())
54
+ logger.info(f"Search returned {len(enriched_articles)} unique results")
55
 
56
  categorized_articles = {}
57
  for article in enriched_articles: