Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 19

Commit

6716a7e

1 Parent(s): 236d6c7

prompt revision 4

Browse files

Files changed (1) hide show

components/generators/daily_feed.py +32 -69

components/generators/daily_feed.py CHANGED Viewed

@@ -2,10 +2,6 @@ import os
 import sys
 import json
 import requests
-from typing import Optional
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 import redis
 from typing import List, Dict
 from llama_index.core import VectorStoreIndex
@@ -13,113 +9,81 @@ from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
-# ✅ Disable OpenAI fallback
 Settings.llm = None
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
-MISTRAL_URL = os.environ.get("MISTRAL_URL")       # Hugging Face endpoint
-HF_TOKEN = os.environ.get("HF_TOKEN")             # Hugging Face token
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
-# 📰 Topics to summarize
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
-BASE_PROMPT = """
-You are the official news summarizer for Nuse — a global, AI-powered news delivery platform. Your tone is factual, insightful, and engaging. Summarize news articles in a crisp, modern format.
-Instructions:
-- Write in 25–30 words only.
-- Generate response organically and avoid trimming words to fit within the limit.
-- Highlight key facts: who, what, when, where, and why.
-- Use a professional tone, but include 1–2 well-placed emojis for engagement.
-- Avoid any mention of Nuse, yourself, or the prompt.
-- Never repeat headlines or include generic filler.
-- Return only the summary — do not include this prompt or any extra info.
-"""
-# ✍️ Build Mistral prompt
-def build_prompt(content: str, topic: str) -> str:
-    return f"{BASE_PROMPT.strip()}\n\nTopic: {topic}\n\n{content.strip()}"
-# 🧠 Send prompt to Mistral
 HEADERS = {
     "Authorization": f"Bearer {HF_TOKEN}",
     "Content-Type": "application/json"
 }
-def call_mistral(prompt: str) -> Optional[str]:
-    if not prompt or len(prompt.strip()) < 10:
-        print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
-        return None
-    # Wrap the prompt properly
-    formatted_prompt = f"[INST] {prompt.strip()} [/INST]"
     payload = {
-        "inputs": formatted_prompt,
         "parameters": {
-            "max_new_tokens": 120,
             "temperature": 0.7,
-        }
     }
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json"
-    }
     try:
-        response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=30)
         response.raise_for_status()
         data = response.json()
-        # Handle both dict and list responses
-        if isinstance(data, list) and "generated_text" in data[0]:
-            return data[0]["generated_text"].strip()
-        elif isinstance(data, dict) and "generated_text" in data:
             return data["generated_text"].strip()
-        else:
-            print("⚠️ Unexpected response format:", data)
-            return None
-    except requests.RequestException as e:
         print("❌ HF Endpoint error:", str(e))
         if e.response is not None:
-            print("❌ Response body:", e.response.text[:300])
     except Exception as e:
         print("❌ Unknown error:", str(e))
-    return None
-# ✂️ Generate summaries per topic
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
-    for i, doc in enumerate(docs[:5]):
-        if not doc or len(doc.strip()) < 200:
-            print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
-            continue
-        print(f"📄 Doc {i+1} preview:\n{doc[:300]}...\n")
         prompt = build_prompt(doc, topic)
         summary = call_mistral(prompt)
         if summary:
             feed.append({
                 "summary": summary,
                 "image_url": "https://source.unsplash.com/800x600/?news",
                 "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
             })
     return feed
-# 🔁 Full pipeline
 def generate_and_cache_daily_feed(documents: List[Document]):
     index = VectorStoreIndex.from_documents(documents)
     retriever = index.as_retriever()
@@ -138,12 +102,11 @@ def generate_and_cache_daily_feed(documents: List[Document]):
             "feed": topic_feed
         })
-    # 💾 Cache feed to Redis
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed
-# 📦 For API access
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
     return json.loads(cached) if cached else []

 import sys
 import json
 import requests
 import redis
 from typing import List, Dict
 from llama_index.core import VectorStoreIndex
 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
+# ✅ Disable implicit LLM usage
 Settings.llm = None
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
+HF_ENDPOINT_URL = os.environ.get("MISTRAL_URL")
+HF_TOKEN = os.environ.get("HF_TOKEN")
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
+# 📰 Topics
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
+# 📄 Headers for HF endpoint
 HEADERS = {
     "Authorization": f"Bearer {HF_TOKEN}",
     "Content-Type": "application/json"
 }
+# 🧠 Build Mistral-style instruction prompt
+def build_prompt(content: str, topic: str) -> str:
+    base_instruction = (
+        "You are Nuse’s official news summarizer — factual, concise, and engaging.\n"
+        "Summarize the following article in 25–30 words with 1–2 emojis.\n"
+        "Return only the summary."
+    )
+    tail = f"Topic: {topic}\n\n{content.strip()}"
+    return f"<s>[INST]{base_instruction}\n\n{tail}[/INST]</s>"
+# 🔁 Call Mistral using HF Inference Endpoint
+def call_mistral(prompt: str) -> str:
     payload = {
+        "inputs": prompt,
         "parameters": {
+            "max_new_tokens": 128,
             "temperature": 0.7,
+        },
     }
     try:
+        response = requests.post(HF_ENDPOINT_URL, headers=HEADERS, json=payload, timeout=90)
         response.raise_for_status()
         data = response.json()
+        if isinstance(data, list) and data:
+            return data[0].get("generated_text", "").strip()
+        if isinstance(data, dict) and "generated_text" in data:
             return data["generated_text"].strip()
+    except requests.exceptions.RequestException as e:
         print("❌ HF Endpoint error:", str(e))
         if e.response is not None:
+            print("Endpoint said:", e.response.text[:300])
     except Exception as e:
         print("❌ Unknown error:", str(e))
+    return ""
+# ✂️ Summarize top N documents
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
+    for doc in docs[:5]:
         prompt = build_prompt(doc, topic)
+        print("\n📤 Prompt sent to Mistral:\n", prompt[:300], "...\n")
         summary = call_mistral(prompt)
         if summary:
             feed.append({
                 "summary": summary,
                 "image_url": "https://source.unsplash.com/800x600/?news",
                 "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
             })
     return feed
+# ⚡ Generate and cache daily feed
 def generate_and_cache_daily_feed(documents: List[Document]):
     index = VectorStoreIndex.from_documents(documents)
     retriever = index.as_retriever()
             "feed": topic_feed
         })
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed
+# 📦 For testing or API access
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
     return json.loads(cached) if cached else []