ragV98 commited on
Commit
9d73da0
Β·
1 Parent(s): 5bf1a1a
Files changed (1) hide show
  1. components/generators/daily_feed.py +21 -14
components/generators/daily_feed.py CHANGED
@@ -12,7 +12,7 @@ from llama_index.core.query_engine import RetrieverQueryEngine
12
  from llama_index.core.schema import Document
13
  from llama_index.core.settings import Settings
14
 
15
- # βœ… Disable implicit LLM usage (prevents OpenAI fallback)
16
  Settings.llm = None
17
 
18
  # πŸ” Environment variables
@@ -24,19 +24,19 @@ HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token
24
  # βœ… Redis client
25
  redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
26
 
27
- # πŸ“° Topics
28
  TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
29
 
30
- # ✍️ Build summarization prompt
31
  def build_prompt(content: str, topic: str) -> str:
32
  return (
33
  f"You are a news summarizer. Summarize the following content in 25-30 words. "
34
  f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
35
  )
36
 
37
- # 🧠 Call Mistral via Hugging Face endpoint
38
  def call_mistral(prompt: str) -> str:
39
- if not prompt or len(prompt.strip()) < 10:
40
  print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
41
  return None
42
 
@@ -45,11 +45,11 @@ def call_mistral(prompt: str) -> str:
45
  "Content-Type": "application/json"
46
  }
47
  payload = {
48
- "inputs": [
49
- {"role": "user", "content": prompt}
50
- ]
51
  }
52
 
 
 
53
  try:
54
  response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
55
  response.raise_for_status()
@@ -58,22 +58,29 @@ def call_mistral(prompt: str) -> str:
58
  print(f"⚠️ Mistral error: {e}")
59
  return None
60
 
61
- # βœ‚οΈ Summarize documents for a given topic
62
  def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
63
  feed = []
64
- for doc in docs[:5]:
 
 
 
 
 
 
65
  prompt = build_prompt(doc, topic)
66
- print("The built prompt", prompt)
67
  summary = call_mistral(prompt)
 
68
  if summary:
69
  feed.append({
70
  "summary": summary,
71
  "image_url": "https://source.unsplash.com/800x600/?news",
72
  "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
73
  })
 
74
  return feed
75
 
76
- # πŸ” Main pipeline: generate and cache feed
77
  def generate_and_cache_daily_feed(documents: List[Document]):
78
  index = VectorStoreIndex.from_documents(documents)
79
  retriever = index.as_retriever()
@@ -92,12 +99,12 @@ def generate_and_cache_daily_feed(documents: List[Document]):
92
  "feed": topic_feed
93
  })
94
 
95
- # πŸ’Ύ Cache in Redis
96
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
97
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
98
  return final_feed
99
 
100
- # πŸ“¦ For API or debugging
101
  def get_cached_daily_feed():
102
  cached = redis_client.get(REDIS_KEY)
103
  return json.loads(cached) if cached else []
 
12
  from llama_index.core.schema import Document
13
  from llama_index.core.settings import Settings
14
 
15
+ # βœ… Disable OpenAI fallback
16
  Settings.llm = None
17
 
18
  # πŸ” Environment variables
 
24
  # βœ… Redis client
25
  redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
26
 
27
+ # πŸ“° Topics to summarize
28
  TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
29
 
30
+ # ✍️ Build Mistral prompt
31
  def build_prompt(content: str, topic: str) -> str:
32
  return (
33
  f"You are a news summarizer. Summarize the following content in 25-30 words. "
34
  f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
35
  )
36
 
37
+ # 🧠 Send prompt to Mistral
38
  def call_mistral(prompt: str) -> str:
39
+ if not prompt or len(prompt.strip()) < 50:
40
  print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
41
  return None
42
 
 
45
  "Content-Type": "application/json"
46
  }
47
  payload = {
48
+ "inputs": [{"role": "user", "content": prompt}]
 
 
49
  }
50
 
51
+ print(f"\nπŸ“€ Prompt sent to Mistral:\n{prompt[:300]}...\n") # show a snippet for debugging
52
+
53
  try:
54
  response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
55
  response.raise_for_status()
 
58
  print(f"⚠️ Mistral error: {e}")
59
  return None
60
 
61
+ # βœ‚οΈ Generate summaries per topic
62
  def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
63
  feed = []
64
+
65
+ for i, doc in enumerate(docs[:5]):
66
+ if not doc or len(doc.strip()) < 200:
67
+ print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
68
+ continue
69
+
70
+ print(f"πŸ“„ Doc {i+1} preview:\n{doc[:300]}...\n")
71
  prompt = build_prompt(doc, topic)
 
72
  summary = call_mistral(prompt)
73
+
74
  if summary:
75
  feed.append({
76
  "summary": summary,
77
  "image_url": "https://source.unsplash.com/800x600/?news",
78
  "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
79
  })
80
+
81
  return feed
82
 
83
+ # πŸ” Full pipeline
84
  def generate_and_cache_daily_feed(documents: List[Document]):
85
  index = VectorStoreIndex.from_documents(documents)
86
  retriever = index.as_retriever()
 
99
  "feed": topic_feed
100
  })
101
 
102
+ # πŸ’Ύ Cache feed to Redis
103
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
104
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
105
  return final_feed
106
 
107
+ # πŸ“¦ For API access
108
  def get_cached_daily_feed():
109
  cached = redis_client.get(REDIS_KEY)
110
  return json.loads(cached) if cached else []