prompt revision 4
Browse files
components/generators/daily_feed.py
CHANGED
@@ -2,10 +2,6 @@ import os
|
|
2 |
import sys
|
3 |
import json
|
4 |
import requests
|
5 |
-
from typing import Optional
|
6 |
-
|
7 |
-
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
8 |
-
|
9 |
import redis
|
10 |
from typing import List, Dict
|
11 |
from llama_index.core import VectorStoreIndex
|
@@ -13,113 +9,81 @@ from llama_index.core.query_engine import RetrieverQueryEngine
|
|
13 |
from llama_index.core.schema import Document
|
14 |
from llama_index.core.settings import Settings
|
15 |
|
16 |
-
# β
Disable
|
17 |
Settings.llm = None
|
18 |
|
19 |
# π Environment variables
|
20 |
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
|
21 |
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
|
22 |
-
|
23 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
24 |
|
25 |
# β
Redis client
|
26 |
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
27 |
|
28 |
-
# π° Topics
|
29 |
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
|
30 |
|
31 |
-
|
32 |
-
You are the official news summarizer for Nuse β a global, AI-powered news delivery platform. Your tone is factual, insightful, and engaging. Summarize news articles in a crisp, modern format.
|
33 |
-
|
34 |
-
Instructions:
|
35 |
-
- Write in 25β30 words only.
|
36 |
-
- Generate response organically and avoid trimming words to fit within the limit.
|
37 |
-
- Highlight key facts: who, what, when, where, and why.
|
38 |
-
- Use a professional tone, but include 1β2 well-placed emojis for engagement.
|
39 |
-
- Avoid any mention of Nuse, yourself, or the prompt.
|
40 |
-
- Never repeat headlines or include generic filler.
|
41 |
-
- Return only the summary β do not include this prompt or any extra info.
|
42 |
-
"""
|
43 |
-
|
44 |
-
# βοΈ Build Mistral prompt
|
45 |
-
def build_prompt(content: str, topic: str) -> str:
|
46 |
-
return f"{BASE_PROMPT.strip()}\n\nTopic: {topic}\n\n{content.strip()}"
|
47 |
-
|
48 |
-
# π§ Send prompt to Mistral
|
49 |
HEADERS = {
|
50 |
"Authorization": f"Bearer {HF_TOKEN}",
|
51 |
"Content-Type": "application/json"
|
52 |
}
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
62 |
payload = {
|
63 |
-
"inputs":
|
64 |
"parameters": {
|
65 |
-
"max_new_tokens":
|
66 |
"temperature": 0.7,
|
67 |
-
}
|
68 |
}
|
69 |
-
|
70 |
-
headers = {
|
71 |
-
"Authorization": f"Bearer {HF_TOKEN}",
|
72 |
-
"Content-Type": "application/json"
|
73 |
-
}
|
74 |
-
|
75 |
try:
|
76 |
-
response = requests.post(
|
77 |
response.raise_for_status()
|
78 |
data = response.json()
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
elif isinstance(data, dict) and "generated_text" in data:
|
84 |
return data["generated_text"].strip()
|
85 |
-
else:
|
86 |
-
print("β οΈ Unexpected response format:", data)
|
87 |
-
return None
|
88 |
|
89 |
-
except requests.RequestException as e:
|
90 |
print("β HF Endpoint error:", str(e))
|
91 |
if e.response is not None:
|
92 |
-
print("
|
93 |
except Exception as e:
|
94 |
print("β Unknown error:", str(e))
|
95 |
|
96 |
-
return
|
97 |
-
|
98 |
-
|
99 |
|
100 |
-
# βοΈ
|
101 |
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
|
102 |
feed = []
|
103 |
-
|
104 |
-
for i, doc in enumerate(docs[:5]):
|
105 |
-
if not doc or len(doc.strip()) < 200:
|
106 |
-
print(f"β οΈ Skipped short/empty doc {i+1} for '{topic}'\n")
|
107 |
-
continue
|
108 |
-
|
109 |
-
print(f"π Doc {i+1} preview:\n{doc[:300]}...\n")
|
110 |
prompt = build_prompt(doc, topic)
|
|
|
111 |
summary = call_mistral(prompt)
|
112 |
-
|
113 |
if summary:
|
114 |
feed.append({
|
115 |
"summary": summary,
|
116 |
"image_url": "https://source.unsplash.com/800x600/?news",
|
117 |
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
|
118 |
})
|
119 |
-
|
120 |
return feed
|
121 |
|
122 |
-
#
|
123 |
def generate_and_cache_daily_feed(documents: List[Document]):
|
124 |
index = VectorStoreIndex.from_documents(documents)
|
125 |
retriever = index.as_retriever()
|
@@ -138,12 +102,11 @@ def generate_and_cache_daily_feed(documents: List[Document]):
|
|
138 |
"feed": topic_feed
|
139 |
})
|
140 |
|
141 |
-
# πΎ Cache feed to Redis
|
142 |
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
|
143 |
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
|
144 |
return final_feed
|
145 |
|
146 |
-
# π¦ For API access
|
147 |
def get_cached_daily_feed():
|
148 |
cached = redis_client.get(REDIS_KEY)
|
149 |
return json.loads(cached) if cached else []
|
|
|
2 |
import sys
|
3 |
import json
|
4 |
import requests
|
|
|
|
|
|
|
|
|
5 |
import redis
|
6 |
from typing import List, Dict
|
7 |
from llama_index.core import VectorStoreIndex
|
|
|
9 |
from llama_index.core.schema import Document
|
10 |
from llama_index.core.settings import Settings
|
11 |
|
12 |
+
# β
Disable implicit LLM usage
|
13 |
Settings.llm = None
|
14 |
|
15 |
# π Environment variables
|
16 |
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
|
17 |
REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
|
18 |
+
HF_ENDPOINT_URL = os.environ.get("MISTRAL_URL")
|
19 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
|
21 |
# β
Redis client
|
22 |
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
23 |
|
24 |
+
# π° Topics
|
25 |
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
|
26 |
|
27 |
+
# π Headers for HF endpoint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
HEADERS = {
|
29 |
"Authorization": f"Bearer {HF_TOKEN}",
|
30 |
"Content-Type": "application/json"
|
31 |
}
|
32 |
|
33 |
+
# π§ Build Mistral-style instruction prompt
|
34 |
+
def build_prompt(content: str, topic: str) -> str:
|
35 |
+
base_instruction = (
|
36 |
+
"You are Nuseβs official news summarizer β factual, concise, and engaging.\n"
|
37 |
+
"Summarize the following article in 25β30 words with 1β2 emojis.\n"
|
38 |
+
"Return only the summary."
|
39 |
+
)
|
40 |
+
tail = f"Topic: {topic}\n\n{content.strip()}"
|
41 |
+
return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
|
42 |
+
|
43 |
+
# π Call Mistral using HF Inference Endpoint
|
44 |
+
def call_mistral(prompt: str) -> str:
|
45 |
payload = {
|
46 |
+
"inputs": prompt,
|
47 |
"parameters": {
|
48 |
+
"max_new_tokens": 128,
|
49 |
"temperature": 0.7,
|
50 |
+
},
|
51 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
try:
|
53 |
+
response = requests.post(HF_ENDPOINT_URL, headers=HEADERS, json=payload, timeout=90)
|
54 |
response.raise_for_status()
|
55 |
data = response.json()
|
56 |
|
57 |
+
if isinstance(data, list) and data:
|
58 |
+
return data[0].get("generated_text", "").strip()
|
59 |
+
if isinstance(data, dict) and "generated_text" in data:
|
|
|
60 |
return data["generated_text"].strip()
|
|
|
|
|
|
|
61 |
|
62 |
+
except requests.exceptions.RequestException as e:
|
63 |
print("β HF Endpoint error:", str(e))
|
64 |
if e.response is not None:
|
65 |
+
print("Endpoint said:", e.response.text[:300])
|
66 |
except Exception as e:
|
67 |
print("β Unknown error:", str(e))
|
68 |
|
69 |
+
return ""
|
|
|
|
|
70 |
|
71 |
+
# βοΈ Summarize top N documents
|
72 |
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
|
73 |
feed = []
|
74 |
+
for doc in docs[:5]:
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
prompt = build_prompt(doc, topic)
|
76 |
+
print("\nπ€ Prompt sent to Mistral:\n", prompt[:300], "...\n")
|
77 |
summary = call_mistral(prompt)
|
|
|
78 |
if summary:
|
79 |
feed.append({
|
80 |
"summary": summary,
|
81 |
"image_url": "https://source.unsplash.com/800x600/?news",
|
82 |
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
|
83 |
})
|
|
|
84 |
return feed
|
85 |
|
86 |
+
# β‘ Generate and cache daily feed
|
87 |
def generate_and_cache_daily_feed(documents: List[Document]):
|
88 |
index = VectorStoreIndex.from_documents(documents)
|
89 |
retriever = index.as_retriever()
|
|
|
102 |
"feed": topic_feed
|
103 |
})
|
104 |
|
|
|
105 |
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
|
106 |
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
|
107 |
return final_feed
|
108 |
|
109 |
+
# π¦ For testing or API access
|
110 |
def get_cached_daily_feed():
|
111 |
cached = redis_client.get(REDIS_KEY)
|
112 |
return json.loads(cached) if cached else []
|