ragV98 commited on
Commit
2af85a2
Β·
1 Parent(s): 7900a77

switching to openai

Browse files
Files changed (1) hide show
  1. components/generators/daily_feed.py +30 -42
components/generators/daily_feed.py CHANGED
@@ -2,34 +2,15 @@ import os
2
  import json
3
  import redis
4
  from typing import List, Dict
 
 
5
  from llama_index.core import VectorStoreIndex, StorageContext
6
  from llama_index.core.query_engine import RetrieverQueryEngine
7
- from llama_index.core.settings import Settings
8
- from llama_index.core.llms.base import LLM, LLMMetadata
9
- from llama_index.core.llms import CompletionResponse
10
-
11
- from components.LLMs.Mistral import call_mistral
12
- from components.indexers.news_indexer import get_upstash_vector_store
13
-
14
- # βœ… Register dummy LLM to avoid context calculation errors
15
- class DummyLLM(LLM):
16
- def complete(self, prompt: str, **kwargs) -> CompletionResponse:
17
- return CompletionResponse(text="")
18
-
19
- @property
20
- def metadata(self) -> LLMMetadata:
21
- return LLMMetadata(
22
- context_window=8192,
23
- num_output=1024,
24
- is_chat_model=False
25
- )
26
-
27
- Settings.llm = DummyLLM()
28
 
29
  # πŸ” Environment variables
30
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
31
  REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
32
- INDEX_DIR = os.environ.get("INDEX_DIR", "storage/index")
33
 
34
  # βœ… Redis client
35
  try:
@@ -44,8 +25,8 @@ TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
44
 
45
  # 🧠 Summarization prompt
46
  BASE_PROMPT = (
47
- "You are Nuse’s official news summarizer β€” fast, sharp, and never generic.\n"
48
- "... (prompt unchanged for brevity) ..."
49
  )
50
 
51
  # πŸ“₯ Load topic-wise documents from Upstash vector store
@@ -67,44 +48,51 @@ def load_documents_by_topic() -> Dict[str, List[str]]:
67
  print(f"❌ [Topic Retrieval Error: {key}]", e)
68
  topic_docs[key] = []
69
  return topic_docs
70
-
71
  except Exception as e:
72
  print("❌ [load_documents_by_topic Error]", e)
73
  return {}
74
 
75
- # πŸ§ͺ Summarize one topic at a time
76
  def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
77
  if not docs:
78
  print(f"⚠️ No docs found for topic: {topic_key}")
79
  return []
80
 
81
  try:
82
- merged_text = "\n\n---\n\n".join(docs)
83
- print(f"🧠 Summarizing topic: {topic_key}")
84
- summary_block = call_mistral(base_prompt=BASE_PROMPT, tail_prompt=merged_text)
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  summaries = []
87
- if summary_block:
88
- for line in summary_block.splitlines():
89
- line = line.strip()
90
- if line.startswith("-"):
91
- clean = line.lstrip("-–").strip()
92
- if clean:
93
- summaries.append({
94
- "summary": clean,
95
- "image_url": "https://source.unsplash.com/800x600/?news",
96
- "article_link": f"https://google.com/search?q={topic_key}+news"
97
- })
98
  return summaries
99
 
100
  except Exception as e:
101
- print(f"❌ [Summarization Error: {topic_key}]", e)
102
  return []
103
 
104
  # πŸš€ Main callable
105
  def generate_and_cache_daily_feed():
106
  try:
107
- print("πŸ†• Running updated daily feed generator...")
108
  topic_docs = load_documents_by_topic()
109
  feed_map = {}
110
 
 
2
  import json
3
  import redis
4
  from typing import List, Dict
5
+ from openai import OpenAI
6
+ from components.indexers.news_indexer import get_upstash_vector_store
7
  from llama_index.core import VectorStoreIndex, StorageContext
8
  from llama_index.core.query_engine import RetrieverQueryEngine
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # πŸ” Environment variables
11
  REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
12
  REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
13
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
14
 
15
  # βœ… Redis client
16
  try:
 
25
 
26
  # 🧠 Summarization prompt
27
  BASE_PROMPT = (
28
+ "You are Nuse’s editorial summarizer. Read the excerpts below and extract the most important stories. "
29
+ "Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
30
  )
31
 
32
  # πŸ“₯ Load topic-wise documents from Upstash vector store
 
48
  print(f"❌ [Topic Retrieval Error: {key}]", e)
49
  topic_docs[key] = []
50
  return topic_docs
 
51
  except Exception as e:
52
  print("❌ [load_documents_by_topic Error]", e)
53
  return {}
54
 
55
+ # πŸ§ͺ Summarize one topic at a time using OpenAI GPT-4
56
  def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
57
  if not docs:
58
  print(f"⚠️ No docs found for topic: {topic_key}")
59
  return []
60
 
61
  try:
62
+ client = OpenAI(api_key=OPENAI_API_KEY)
63
+ content = "\n\n---\n\n".join(docs)[:12000] # trim to avoid token overflow
64
+
65
+ print(f"🧠 Summarizing topic via OpenAI: {topic_key}")
66
+ completion = client.chat.completions.create(
67
+ model="gpt-4",
68
+ messages=[
69
+ {"role": "system", "content": BASE_PROMPT},
70
+ {"role": "user", "content": content},
71
+ ],
72
+ max_tokens=512,
73
+ )
74
+
75
+ text = completion.choices[0].message.content.strip()
76
 
77
  summaries = []
78
+ for line in text.splitlines():
79
+ line = line.strip("-–‒ ")
80
+ if line:
81
+ summaries.append({
82
+ "summary": line,
83
+ "image_url": "https://source.unsplash.com/800x600/?news",
84
+ "article_link": f"https://google.com/search?q={topic_key}+news"
85
+ })
 
 
 
86
  return summaries
87
 
88
  except Exception as e:
89
+ print(f"❌ [OpenAI Summarization Error: {topic_key}]", e)
90
  return []
91
 
92
  # πŸš€ Main callable
93
  def generate_and_cache_daily_feed():
94
  try:
95
+ print("πŸ†• Running OpenAI-powered daily feed generator...")
96
  topic_docs = load_documents_by_topic()
97
  feed_map = {}
98