ragV98 commited on
Commit
faed34c
Β·
1 Parent(s): 2bdf31a
Files changed (1) hide show
  1. components/generators/daily_feed.py +21 -17
components/generators/daily_feed.py CHANGED
@@ -1,10 +1,12 @@
1
  import os
2
  import json
3
  import redis
 
4
  from typing import List, Dict
5
  from openai import OpenAI
6
  from components.indexers.news_indexer import get_upstash_vector_store
7
- from llama_index.core import VectorStoreIndex, StorageContext
 
8
 
9
  # πŸ” Environment variables
10
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
@@ -28,26 +30,29 @@ BASE_PROMPT = (
28
  "Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
29
  )
30
 
31
- # πŸ“₯ Load topic-wise documents from Upstash vector store
32
  def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
33
- topic_docs = {}
34
 
35
  try:
36
  vector_store = get_upstash_vector_store()
37
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
38
 
39
- # πŸ” Load all documents from vector store
40
- all_nodes = vector_store._data.values() # Upstash uses `_data` internally to store nodes
41
-
42
- for node in all_nodes:
43
  try:
44
- content = node.get_content().strip()
45
- topic = node.metadata.get("topic", "").lower().replace(" news", "")
46
- if not content or not topic:
47
- continue
48
- topic_docs.setdefault(topic, []).append(content)
 
 
 
 
 
 
 
49
  except Exception as e:
50
- print(f"❌ [Node processing error]", e)
51
 
52
  except Exception as e:
53
  print("❌ [load_all_documents_grouped_by_topic Error]", e)
@@ -62,9 +67,9 @@ def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
62
 
63
  try:
64
  client = OpenAI(api_key=OPENAI_API_KEY)
65
- content = "\n\n---\n\n".join(docs)[:12000] # trim to avoid token overflow
66
 
67
- print(f"🧠 Summarizing topic via the OpenAI: {topic_key}")
68
  completion = client.chat.completions.create(
69
  model="gpt-4",
70
  messages=[
@@ -96,7 +101,6 @@ def generate_and_cache_daily_feed():
96
  try:
97
  print("πŸ†• Running OpenAI-powered daily feed generator....")
98
  topic_docs = load_all_documents_grouped_by_topic()
99
- print('Loaded topics', topic_docs)
100
  feed_map = {}
101
 
102
  for topic_key in TOPIC_KEYS:
 
1
  import os
2
  import json
3
  import redis
4
+ import numpy as np
5
  from typing import List, Dict
6
  from openai import OpenAI
7
  from components.indexers.news_indexer import get_upstash_vector_store
8
+ from llama_index.core import StorageContext
9
+ from llama_index.vector_stores.types import VectorStoreQuery
10
 
11
  # πŸ” Environment variables
12
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 
30
  "Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
31
  )
32
 
33
+ # πŸ“₯ Load documents grouped by topic from Upstash
34
  def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
35
+ topic_docs = {key: [] for key in TOPIC_KEYS}
36
 
37
  try:
38
  vector_store = get_upstash_vector_store()
 
39
 
40
+ for topic, key in zip(TOPICS, TOPIC_KEYS):
 
 
 
41
  try:
42
+ dummy_vector = np.random.rand(384).tolist() # Assuming MiniLM embeddings
43
+ query = VectorStoreQuery(
44
+ query_embedding=dummy_vector,
45
+ similarity_top_k=50,
46
+ filters={"topic": topic}
47
+ )
48
+ result = vector_store.query(query)
49
+
50
+ for node in result.nodes:
51
+ content = node.get_content().strip()
52
+ if content:
53
+ topic_docs[key].append(content)
54
  except Exception as e:
55
+ print(f"❌ [Topic Metadata Filter error: {key}]", e)
56
 
57
  except Exception as e:
58
  print("❌ [load_all_documents_grouped_by_topic Error]", e)
 
67
 
68
  try:
69
  client = OpenAI(api_key=OPENAI_API_KEY)
70
+ content = "\n\n---\n\n".join(docs)[:12000]
71
 
72
+ print(f"🧠 Summarizing topic via OpenAI: {topic_key}")
73
  completion = client.chat.completions.create(
74
  model="gpt-4",
75
  messages=[
 
101
  try:
102
  print("πŸ†• Running OpenAI-powered daily feed generator....")
103
  topic_docs = load_all_documents_grouped_by_topic()
 
104
  feed_map = {}
105
 
106
  for topic_key in TOPIC_KEYS: