ragV98 commited on
Commit
6716a7e
Β·
1 Parent(s): 236d6c7

prompt revision 4

Browse files
Files changed (1) hide show
  1. components/generators/daily_feed.py +32 -69
components/generators/daily_feed.py CHANGED
@@ -2,10 +2,6 @@ import os
2
  import sys
3
  import json
4
  import requests
5
- from typing import Optional
6
-
7
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8
-
9
  import redis
10
  from typing import List, Dict
11
  from llama_index.core import VectorStoreIndex
@@ -13,113 +9,81 @@ from llama_index.core.query_engine import RetrieverQueryEngine
13
  from llama_index.core.schema import Document
14
  from llama_index.core.settings import Settings
15
 
16
- # βœ… Disable OpenAI fallback
17
  Settings.llm = None
18
 
19
  # πŸ” Environment variables
20
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
21
  REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
22
- MISTRAL_URL = os.environ.get("MISTRAL_URL") # Hugging Face endpoint
23
- HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token
24
 
25
  # βœ… Redis client
26
  redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
27
 
28
- # πŸ“° Topics to summarize
29
  TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
30
 
31
- BASE_PROMPT = """
32
- You are the official news summarizer for Nuse β€” a global, AI-powered news delivery platform. Your tone is factual, insightful, and engaging. Summarize news articles in a crisp, modern format.
33
-
34
- Instructions:
35
- - Write in 25–30 words only.
36
- - Generate response organically and avoid trimming words to fit within the limit.
37
- - Highlight key facts: who, what, when, where, and why.
38
- - Use a professional tone, but include 1–2 well-placed emojis for engagement.
39
- - Avoid any mention of Nuse, yourself, or the prompt.
40
- - Never repeat headlines or include generic filler.
41
- - Return only the summary β€” do not include this prompt or any extra info.
42
- """
43
-
44
- # ✍️ Build Mistral prompt
45
- def build_prompt(content: str, topic: str) -> str:
46
- return f"{BASE_PROMPT.strip()}\n\nTopic: {topic}\n\n{content.strip()}"
47
-
48
- # 🧠 Send prompt to Mistral
49
  HEADERS = {
50
  "Authorization": f"Bearer {HF_TOKEN}",
51
  "Content-Type": "application/json"
52
  }
53
 
54
- def call_mistral(prompt: str) -> Optional[str]:
55
- if not prompt or len(prompt.strip()) < 10:
56
- print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
57
- return None
58
-
59
- # Wrap the prompt properly
60
- formatted_prompt = f"[INST] {prompt.strip()} [/INST]"
61
-
 
 
 
 
62
  payload = {
63
- "inputs": formatted_prompt,
64
  "parameters": {
65
- "max_new_tokens": 120,
66
  "temperature": 0.7,
67
- }
68
  }
69
-
70
- headers = {
71
- "Authorization": f"Bearer {HF_TOKEN}",
72
- "Content-Type": "application/json"
73
- }
74
-
75
  try:
76
- response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=30)
77
  response.raise_for_status()
78
  data = response.json()
79
 
80
- # Handle both dict and list responses
81
- if isinstance(data, list) and "generated_text" in data[0]:
82
- return data[0]["generated_text"].strip()
83
- elif isinstance(data, dict) and "generated_text" in data:
84
  return data["generated_text"].strip()
85
- else:
86
- print("⚠️ Unexpected response format:", data)
87
- return None
88
 
89
- except requests.RequestException as e:
90
  print("❌ HF Endpoint error:", str(e))
91
  if e.response is not None:
92
- print("❌ Response body:", e.response.text[:300])
93
  except Exception as e:
94
  print("❌ Unknown error:", str(e))
95
 
96
- return None
97
-
98
-
99
 
100
- # βœ‚οΈ Generate summaries per topic
101
  def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
102
  feed = []
103
-
104
- for i, doc in enumerate(docs[:5]):
105
- if not doc or len(doc.strip()) < 200:
106
- print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
107
- continue
108
-
109
- print(f"πŸ“„ Doc {i+1} preview:\n{doc[:300]}...\n")
110
  prompt = build_prompt(doc, topic)
 
111
  summary = call_mistral(prompt)
112
-
113
  if summary:
114
  feed.append({
115
  "summary": summary,
116
  "image_url": "https://source.unsplash.com/800x600/?news",
117
  "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
118
  })
119
-
120
  return feed
121
 
122
- # πŸ” Full pipeline
123
  def generate_and_cache_daily_feed(documents: List[Document]):
124
  index = VectorStoreIndex.from_documents(documents)
125
  retriever = index.as_retriever()
@@ -138,12 +102,11 @@ def generate_and_cache_daily_feed(documents: List[Document]):
138
  "feed": topic_feed
139
  })
140
 
141
- # πŸ’Ύ Cache feed to Redis
142
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
143
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
144
  return final_feed
145
 
146
- # πŸ“¦ For API access
147
  def get_cached_daily_feed():
148
  cached = redis_client.get(REDIS_KEY)
149
  return json.loads(cached) if cached else []
 
2
  import sys
3
  import json
4
  import requests
 
 
 
 
5
  import redis
6
  from typing import List, Dict
7
  from llama_index.core import VectorStoreIndex
 
9
  from llama_index.core.schema import Document
10
  from llama_index.core.settings import Settings
11
 
12
+ # βœ… Disable implicit LLM usage
13
  Settings.llm = None
14
 
15
  # πŸ” Environment variables
16
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
17
  REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
18
+ HF_ENDPOINT_URL = os.environ.get("MISTRAL_URL")
19
+ HF_TOKEN = os.environ.get("HF_TOKEN")
20
 
21
  # βœ… Redis client
22
  redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
23
 
24
+ # πŸ“° Topics
25
  TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
26
 
27
+ # πŸ“„ Headers for HF endpoint
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  HEADERS = {
29
  "Authorization": f"Bearer {HF_TOKEN}",
30
  "Content-Type": "application/json"
31
  }
32
 
33
+ # 🧠 Build Mistral-style instruction prompt
34
+ def build_prompt(content: str, topic: str) -> str:
35
+ base_instruction = (
36
+ "You are Nuse’s official news summarizer β€” factual, concise, and engaging.\n"
37
+ "Summarize the following article in 25–30 words with 1–2 emojis.\n"
38
+ "Return only the summary."
39
+ )
40
+ tail = f"Topic: {topic}\n\n{content.strip()}"
41
+ return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
42
+
43
+ # πŸ” Call Mistral using HF Inference Endpoint
44
+ def call_mistral(prompt: str) -> str:
45
  payload = {
46
+ "inputs": prompt,
47
  "parameters": {
48
+ "max_new_tokens": 128,
49
  "temperature": 0.7,
50
+ },
51
  }
 
 
 
 
 
 
52
  try:
53
+ response = requests.post(HF_ENDPOINT_URL, headers=HEADERS, json=payload, timeout=90)
54
  response.raise_for_status()
55
  data = response.json()
56
 
57
+ if isinstance(data, list) and data:
58
+ return data[0].get("generated_text", "").strip()
59
+ if isinstance(data, dict) and "generated_text" in data:
 
60
  return data["generated_text"].strip()
 
 
 
61
 
62
+ except requests.exceptions.RequestException as e:
63
  print("❌ HF Endpoint error:", str(e))
64
  if e.response is not None:
65
+ print("Endpoint said:", e.response.text[:300])
66
  except Exception as e:
67
  print("❌ Unknown error:", str(e))
68
 
69
+ return ""
 
 
70
 
71
+ # βœ‚οΈ Summarize top N documents
72
  def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
73
  feed = []
74
+ for doc in docs[:5]:
 
 
 
 
 
 
75
  prompt = build_prompt(doc, topic)
76
+ print("\nπŸ“€ Prompt sent to Mistral:\n", prompt[:300], "...\n")
77
  summary = call_mistral(prompt)
 
78
  if summary:
79
  feed.append({
80
  "summary": summary,
81
  "image_url": "https://source.unsplash.com/800x600/?news",
82
  "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
83
  })
 
84
  return feed
85
 
86
+ # ⚑ Generate and cache daily feed
87
  def generate_and_cache_daily_feed(documents: List[Document]):
88
  index = VectorStoreIndex.from_documents(documents)
89
  retriever = index.as_retriever()
 
102
  "feed": topic_feed
103
  })
104
 
 
105
  redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
106
  print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
107
  return final_feed
108
 
109
+ # πŸ“¦ For testing or API access
110
  def get_cached_daily_feed():
111
  cached = redis_client.get(REDIS_KEY)
112
  return json.loads(cached) if cached else []