Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 19

Commit

9d73da0

1 Parent(s): 5bf1a1a

fix 1

Browse files

Files changed (1) hide show

components/generators/daily_feed.py +21 -14

components/generators/daily_feed.py CHANGED Viewed

@@ -12,7 +12,7 @@ from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
-# ✅ Disable implicit LLM usage (prevents OpenAI fallback)
 Settings.llm = None
 # 🔐 Environment variables
@@ -24,19 +24,19 @@ HF_TOKEN = os.environ.get("HF_TOKEN")             # Hugging Face token
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
-# 📰 Topics
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
-# ✍️ Build summarization prompt
 def build_prompt(content: str, topic: str) -> str:
     return (
         f"You are a news summarizer. Summarize the following content in 25-30 words. "
         f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
     )
-# 🧠 Call Mistral via Hugging Face endpoint
 def call_mistral(prompt: str) -> str:
-    if not prompt or len(prompt.strip()) < 10:
         print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
         return None
@@ -45,11 +45,11 @@ def call_mistral(prompt: str) -> str:
         "Content-Type": "application/json"
     }
     payload = {
-        "inputs": [
-            {"role": "user", "content": prompt}
-        ]
     }
     try:
         response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
         response.raise_for_status()
@@ -58,22 +58,29 @@ def call_mistral(prompt: str) -> str:
         print(f"⚠️ Mistral error: {e}")
         return None
-# ✂️ Summarize documents for a given topic
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
-    for doc in docs[:5]:
         prompt = build_prompt(doc, topic)
-        print("The built prompt", prompt)
         summary = call_mistral(prompt)
         if summary:
             feed.append({
                 "summary": summary,
                 "image_url": "https://source.unsplash.com/800x600/?news",
                 "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
             })
     return feed
-# 🔁 Main pipeline: generate and cache feed
 def generate_and_cache_daily_feed(documents: List[Document]):
     index = VectorStoreIndex.from_documents(documents)
     retriever = index.as_retriever()
@@ -92,12 +99,12 @@ def generate_and_cache_daily_feed(documents: List[Document]):
             "feed": topic_feed
         })
-    # 💾 Cache in Redis
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed
-# 📦 For API or debugging
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
     return json.loads(cached) if cached else []

 from llama_index.core.schema import Document
 from llama_index.core.settings import Settings
+# ✅ Disable OpenAI fallback
 Settings.llm = None
 # 🔐 Environment variables
 # ✅ Redis client
 redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
+# 📰 Topics to summarize
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
+# ✍️ Build Mistral prompt
 def build_prompt(content: str, topic: str) -> str:
     return (
         f"You are a news summarizer. Summarize the following content in 25-30 words. "
         f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
     )
+# 🧠 Send prompt to Mistral
 def call_mistral(prompt: str) -> str:
+    if not prompt or len(prompt.strip()) < 50:
         print(f"⚠️ Skipping empty or invalid prompt:\n{prompt}\n")
         return None
         "Content-Type": "application/json"
     }
     payload = {
+        "inputs": [{"role": "user", "content": prompt}]
     }
+    print(f"\n📤 Prompt sent to Mistral:\n{prompt[:300]}...\n")  # show a snippet for debugging
     try:
         response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
         response.raise_for_status()
         print(f"⚠️ Mistral error: {e}")
         return None
+# ✂️ Generate summaries per topic
 def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
     feed = []
+    for i, doc in enumerate(docs[:5]):
+        if not doc or len(doc.strip()) < 200:
+            print(f"⚠️ Skipped short/empty doc {i+1} for '{topic}'\n")
+            continue
+        print(f"📄 Doc {i+1} preview:\n{doc[:300]}...\n")
         prompt = build_prompt(doc, topic)
         summary = call_mistral(prompt)
         if summary:
             feed.append({
                 "summary": summary,
                 "image_url": "https://source.unsplash.com/800x600/?news",
                 "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
             })
     return feed
+# 🔁 Full pipeline
 def generate_and_cache_daily_feed(documents: List[Document]):
     index = VectorStoreIndex.from_documents(documents)
     retriever = index.as_retriever()
             "feed": topic_feed
         })
+    # 💾 Cache feed to Redis
     redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
     print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
     return final_feed
+# 📦 For API access
 def get_cached_daily_feed():
     cached = redis_client.get(REDIS_KEY)
     return json.loads(cached) if cached else []