fix 1
Browse files
components/generators/daily_feed.py
CHANGED
@@ -12,7 +12,7 @@ from llama_index.core.query_engine import RetrieverQueryEngine
|
|
12 |
from llama_index.core.schema import Document
|
13 |
from llama_index.core.settings import Settings
|
14 |
|
15 |
-
# β
Disable
|
16 |
Settings.llm = None
|
17 |
|
18 |
# π Environment variables
|
@@ -24,19 +24,19 @@ HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face token
|
|
24 |
# β
Redis client
|
25 |
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
26 |
|
27 |
-
# π° Topics
|
28 |
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
|
29 |
|
30 |
-
# βοΈ Build
|
31 |
def build_prompt(content: str, topic: str) -> str:
|
32 |
return (
|
33 |
f"You are a news summarizer. Summarize the following content in 25-30 words. "
|
34 |
f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
|
35 |
)
|
36 |
|
37 |
-
# π§
|
38 |
def call_mistral(prompt: str) -> str:
|
39 |
-
if not prompt or len(prompt.strip()) <
|
40 |
print(f"β οΈ Skipping empty or invalid prompt:\n{prompt}\n")
|
41 |
return None
|
42 |
|
@@ -45,11 +45,11 @@ def call_mistral(prompt: str) -> str:
|
|
45 |
"Content-Type": "application/json"
|
46 |
}
|
47 |
payload = {
|
48 |
-
"inputs": [
|
49 |
-
{"role": "user", "content": prompt}
|
50 |
-
]
|
51 |
}
|
52 |
|
|
|
|
|
53 |
try:
|
54 |
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
|
55 |
response.raise_for_status()
|
@@ -58,22 +58,29 @@ def call_mistral(prompt: str) -> str:
|
|
58 |
print(f"β οΈ Mistral error: {e}")
|
59 |
return None
|
60 |
|
61 |
-
# βοΈ
|
62 |
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
|
63 |
feed = []
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
prompt = build_prompt(doc, topic)
|
66 |
-
print("The built prompt", prompt)
|
67 |
summary = call_mistral(prompt)
|
|
|
68 |
if summary:
|
69 |
feed.append({
|
70 |
"summary": summary,
|
71 |
"image_url": "https://source.unsplash.com/800x600/?news",
|
72 |
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
|
73 |
})
|
|
|
74 |
return feed
|
75 |
|
76 |
-
# π
|
77 |
def generate_and_cache_daily_feed(documents: List[Document]):
|
78 |
index = VectorStoreIndex.from_documents(documents)
|
79 |
retriever = index.as_retriever()
|
@@ -92,12 +99,12 @@ def generate_and_cache_daily_feed(documents: List[Document]):
|
|
92 |
"feed": topic_feed
|
93 |
})
|
94 |
|
95 |
-
# πΎ Cache
|
96 |
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
|
97 |
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
|
98 |
return final_feed
|
99 |
|
100 |
-
# π¦ For API
|
101 |
def get_cached_daily_feed():
|
102 |
cached = redis_client.get(REDIS_KEY)
|
103 |
return json.loads(cached) if cached else []
|
|
|
12 |
from llama_index.core.schema import Document
|
13 |
from llama_index.core.settings import Settings
|
14 |
|
15 |
+
# β
Disable OpenAI fallback
|
16 |
Settings.llm = None
|
17 |
|
18 |
# π Environment variables
|
|
|
24 |
# β
Redis client
|
25 |
redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
|
26 |
|
27 |
+
# π° Topics to summarize
|
28 |
TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
|
29 |
|
30 |
+
# βοΈ Build Mistral prompt
|
31 |
def build_prompt(content: str, topic: str) -> str:
|
32 |
return (
|
33 |
f"You are a news summarizer. Summarize the following content in 25-30 words. "
|
34 |
f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
|
35 |
)
|
36 |
|
37 |
+
# π§ Send prompt to Mistral
|
38 |
def call_mistral(prompt: str) -> str:
|
39 |
+
if not prompt or len(prompt.strip()) < 50:
|
40 |
print(f"β οΈ Skipping empty or invalid prompt:\n{prompt}\n")
|
41 |
return None
|
42 |
|
|
|
45 |
"Content-Type": "application/json"
|
46 |
}
|
47 |
payload = {
|
48 |
+
"inputs": [{"role": "user", "content": prompt}]
|
|
|
|
|
49 |
}
|
50 |
|
51 |
+
print(f"\nπ€ Prompt sent to Mistral:\n{prompt[:300]}...\n") # show a snippet for debugging
|
52 |
+
|
53 |
try:
|
54 |
response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
|
55 |
response.raise_for_status()
|
|
|
58 |
print(f"β οΈ Mistral error: {e}")
|
59 |
return None
|
60 |
|
61 |
+
# βοΈ Generate summaries per topic
|
62 |
def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
|
63 |
feed = []
|
64 |
+
|
65 |
+
for i, doc in enumerate(docs[:5]):
|
66 |
+
if not doc or len(doc.strip()) < 200:
|
67 |
+
print(f"β οΈ Skipped short/empty doc {i+1} for '{topic}'\n")
|
68 |
+
continue
|
69 |
+
|
70 |
+
print(f"π Doc {i+1} preview:\n{doc[:300]}...\n")
|
71 |
prompt = build_prompt(doc, topic)
|
|
|
72 |
summary = call_mistral(prompt)
|
73 |
+
|
74 |
if summary:
|
75 |
feed.append({
|
76 |
"summary": summary,
|
77 |
"image_url": "https://source.unsplash.com/800x600/?news",
|
78 |
"article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
|
79 |
})
|
80 |
+
|
81 |
return feed
|
82 |
|
83 |
+
# π Full pipeline
|
84 |
def generate_and_cache_daily_feed(documents: List[Document]):
|
85 |
index = VectorStoreIndex.from_documents(documents)
|
86 |
retriever = index.as_retriever()
|
|
|
99 |
"feed": topic_feed
|
100 |
})
|
101 |
|
102 |
+
# πΎ Cache feed to Redis
|
103 |
redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
|
104 |
print(f"β
Cached daily feed under key '{REDIS_KEY}'")
|
105 |
return final_feed
|
106 |
|
107 |
+
# π¦ For API access
|
108 |
def get_cached_daily_feed():
|
109 |
cached = redis_client.get(REDIS_KEY)
|
110 |
return json.loads(cached) if cached else []
|