Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 23

Commit

2af85a2

1 Parent(s): 7900a77

switching to openai

Browse files

Files changed (1) hide show

components/generators/daily_feed.py +30 -42

components/generators/daily_feed.py CHANGED Viewed

@@ -2,34 +2,15 @@ import os
 import json
 import redis
 from typing import List, Dict
 from llama_index.core import VectorStoreIndex, StorageContext
 from llama_index.core.query_engine import RetrieverQueryEngine
-from llama_index.core.settings import Settings
-from llama_index.core.llms.base import LLM, LLMMetadata
-from llama_index.core.llms import CompletionResponse
-from components.LLMs.Mistral import call_mistral
-from components.indexers.news_indexer import get_upstash_vector_store
-# ✅ Register dummy LLM to avoid context calculation errors
-class DummyLLM(LLM):
-    def complete(self, prompt: str, **kwargs) -> CompletionResponse:
-        return CompletionResponse(text="")
-    @property
-    def metadata(self) -> LLMMetadata:
-        return LLMMetadata(
-            context_window=8192,
-            num_output=1024,
-            is_chat_model=False
-        )
-Settings.llm = DummyLLM()
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
-INDEX_DIR = os.environ.get("INDEX_DIR", "storage/index")
 # ✅ Redis client
 try:
@@ -44,8 +25,8 @@ TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
 # 🧠 Summarization prompt
 BASE_PROMPT = (
-    "You are Nuse’s official news summarizer — fast, sharp, and never generic.\n"
-    "... (prompt unchanged for brevity) ..."
 )
 # 📥 Load topic-wise documents from Upstash vector store
@@ -67,44 +48,51 @@ def load_documents_by_topic() -> Dict[str, List[str]]:
                 print(f"❌ [Topic Retrieval Error: {key}]", e)
                 topic_docs[key] = []
         return topic_docs
     except Exception as e:
         print("❌ [load_documents_by_topic Error]", e)
         return {}
-# 🧪 Summarize one topic at a time
 def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
     if not docs:
         print(f"⚠️ No docs found for topic: {topic_key}")
         return []
     try:
-        merged_text = "\n\n---\n\n".join(docs)
-        print(f"🧠 Summarizing topic: {topic_key}")
-        summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
         summaries = []
-        if summary_block:
-            for line in summary_block.splitlines():
-                line = line.strip()
-                if line.startswith("-"):
-                    clean = line.lstrip("-–").strip()
-                    if clean:
-                        summaries.append({
-                            "summary": clean,
-                            "image_url": "https://source.unsplash.com/800x600/?news",
-                            "article_link": f"https://google.com/search?q={topic_key}+news"
-                        })
         return summaries
     except Exception as e:
-        print(f"❌ [Summarization Error: {topic_key}]", e)
         return []
 # 🚀 Main callable
 def generate_and_cache_daily_feed():
     try:
-        print("🆕 Running updated daily feed generator...")
         topic_docs = load_documents_by_topic()
         feed_map = {}

 import json
 import redis
 from typing import List, Dict
+from openai import OpenAI
+from components.indexers.news_indexer import get_upstash_vector_store
 from llama_index.core import VectorStoreIndex, StorageContext
 from llama_index.core.query_engine import RetrieverQueryEngine
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
 REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 # ✅ Redis client
 try:
 # 🧠 Summarization prompt
 BASE_PROMPT = (
+    "You are Nuse’s editorial summarizer. Read the excerpts below and extract the most important stories. "
+    "Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
 )
 # 📥 Load topic-wise documents from Upstash vector store
                 print(f"❌ [Topic Retrieval Error: {key}]", e)
                 topic_docs[key] = []
         return topic_docs
     except Exception as e:
         print("❌ [load_documents_by_topic Error]", e)
         return {}
+# 🧪 Summarize one topic at a time using OpenAI GPT-4
 def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
     if not docs:
         print(f"⚠️ No docs found for topic: {topic_key}")
         return []
     try:
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        content = "\n\n---\n\n".join(docs)[:12000]  # trim to avoid token overflow
+        print(f"🧠 Summarizing topic via OpenAI: {topic_key}")
+        completion = client.chat.completions.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": BASE_PROMPT},
+                {"role": "user", "content": content},
+            ],
+            max_tokens=512,
+        )
+        text = completion.choices[0].message.content.strip()
         summaries = []
+        for line in text.splitlines():
+            line = line.strip("-–• ")
+            if line:
+                summaries.append({
+                    "summary": line,
+                    "image_url": "https://source.unsplash.com/800x600/?news",
+                    "article_link": f"https://google.com/search?q={topic_key}+news"
+                })
         return summaries
     except Exception as e:
+        print(f"❌ [OpenAI Summarization Error: {topic_key}]", e)
         return []
 # 🚀 Main callable
 def generate_and_cache_daily_feed():
     try:
+        print("🆕 Running OpenAI-powered daily feed generator...")
         topic_docs = load_documents_by_topic()
         feed_map = {}