Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 23

Commit

315bd36

1 Parent(s): f8625a7

fix 1

Browse files

Files changed (1) hide show

components/generators/daily_feed.py +26 -63

components/generators/daily_feed.py CHANGED Viewed

@@ -10,7 +10,7 @@ from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilte
 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
-REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN") # Using REDIS_KEY for the cache key, assuming UPSTASH_REDIS_TOKEN is meant for the cache key here
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 # ✅ Redis client
@@ -22,6 +22,7 @@ except Exception as e:
 # 📰 Topic list
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
 TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
 # 🧠 Summarization prompt
@@ -38,23 +39,20 @@ def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
         vector_store = get_upstash_vector_store()
         print("💡 Successfully retrieved Upstash vector store.")
-        # --- ADD THESE PRINT STATEMENTS ---
         print(f"DEBUG: TOPICS = {TOPICS}")
         print(f"DEBUG: TOPIC_KEYS = {TOPIC_KEYS}")
         print(f"DEBUG: Length of TOPICS = {len(TOPICS)}")
         print(f"DEBUG: Length of TOPIC_KEYS = {len(TOPIC_KEYS)}")
-        # ----------------------------------
-        for topic, key in zip(TOPICS, TOPIC_KEYS):
             try:
-                # Upstash VectorStore expects the filter value to match the exact string
-                # of the topic as it was indexed. Make sure your 'topic' metadata
-                # in Upstash exactly matches the values in TOPICS (e.g., "India news").
-                # Construct MetadataFilters object
                 filters = MetadataFilters(
                     filters=[
-                        MetadataFilter(key="topic", value=topic, operator=FilterOperator.EQ)
                     ]
                 )
@@ -65,27 +63,21 @@ def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
                     filters=filters # Apply the metadata filter
                 )
-                # Removed the problematic .to_dict() call
-                print(f"🔎 Querying Upstash for topic: '{topic}'")
                 result = vector_store.query(query)
-                print(f"➡️ Found {len(result.nodes)} nodes for topic: '{topic}'.")
                 for node in result.nodes:
                     content = node.get_content().strip()
                     if content:
-                        topic_docs[key].append(content)
                         # Optional: Print metadata to verify filtering
                         # print(f"  Node metadata: {node.metadata}")
             except Exception as e:
-                print(f"❌ [Topic Metadata Filter error for '{topic}']: {e}")
-                # Optional: Log the full traceback for more detailed debugging
-                # import traceback
-                # traceback.print_exc()
     except Exception as e:
         print("❌ [load_all_documents_grouped_by_topic Error]", e)
-        # import traceback
-        # traceback.print_exc()
     return topic_docs
@@ -97,38 +89,34 @@ def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
     try:
         client = OpenAI(api_key=OPENAI_API_KEY)
-        # Join documents, ensuring we don't exceed typical GPT-4 context window (approx 128k tokens, 12000 chars is safe)
         content = "\n\n---\n\n".join(docs)[:12000]
         print(f"🧠 Summarizing topic via OpenAI: {topic_key} ({len(docs)} documents)")
         completion = client.chat.completions.create(
-            model="gpt-4", # Or "gpt-4o" for potentially better performance
             messages=[
                 {"role": "system", "content": BASE_PROMPT},
                 {"role": "user", "content": content},
             ],
-            max_tokens=512, # Enough tokens for 3 punchy headlines
-            temperature=0.7, # A bit creative but focused
         )
         text = completion.choices[0].message.content.strip()
         summaries = []
-        # Parse the headlines, assuming they might be bullet points or lines
         for line in text.splitlines():
-            line = line.strip("-–• ") # Remove common bullet characters
             if line:
                 summaries.append({
                     "summary": line,
-                    "image_url": "https://source.unsplash.com/800x600/?news", # Generic image, could be improved
-                    "article_link": f"https://google.com/search?q={topic_key}+news" # Generic search link
                 })
         return summaries
     except Exception as e:
         print(f"❌ [OpenAI Summarization Error for '{topic_key}']: {e}")
-        # import traceback
-        # traceback.print_exc()
         return []
 # 🚀 Main callable
@@ -144,37 +132,30 @@ def generate_and_cache_daily_feed():
                 feed_map[topic_key] = summaries
             except Exception as e:
                 print(f"❌ [Topic Loop Error for '{topic_key}']: {e}")
-                # import traceback
-                # traceback.print_exc()
                 feed_map[topic_key] = []
-        final_feed = [{"topic": topic, "feed": feed_map[topic_key]} for topic, topic_key in zip(TOPICS, TOPIC_KEYS)]
         try:
-            # Ensure the REDIS_KEY is suitable for a key name (e.g., not an API token itself)
-            # You might want a separate environment variable for the cache key, e.g., DAILY_FEED_CACHE_KEY
-            cache_key_name = "daily_news_feed_cache" # A more descriptive key
             redis_client.set(cache_key_name, json.dumps(final_feed, ensure_ascii=False))
-            # Set an expiry for the cache, e.g., 24 hours (86400 seconds)
             redis_client.expire(cache_key_name, 86400)
             print(f"✅ Cached daily feed under key '{cache_key_name}' with 24-hour expiry.")
         except Exception as e:
             print("❌ [Redis Cache Error]", e)
-            # import traceback
-            # traceback.print_exc()
         return final_feed
     except Exception as e:
         print("❌ [generate_and_cache_daily_feed Overall Error]", e)
-        # import traceback
-        # traceback.print_exc()
         return []
 # 📦 Get cached data
 def get_cached_daily_feed():
     try:
-        cache_key_name = "daily_news_feed_cache" # Use the same key name as in generate_and_cache_daily_feed
         cached = redis_client.get(cache_key_name)
         if cached:
             print(f"✅ Retrieved cached daily feed from '{cache_key_name}'.")
@@ -184,37 +165,19 @@ def get_cached_daily_feed():
             return []
     except Exception as e:
         print("❌ [get_cached_daily_feed Error]", e)
-        # import traceback
-        # traceback.print_exc()
         return []
 # Example of how to run it (for testing purposes, if this were the main script)
 if __name__ == "__main__":
     # Ensure your environment variables are set before running
     # os.environ["UPSTASH_REDIS_URL"] = "your_upstash_redis_url"
-    # os.environ["UPSTASH_REDIS_TOKEN"] = "your_upstash_redis_token" # This should ideally be a unique key for caching, not the token
     # os.environ["OPENAI_API_KEY"] = "your_openai_api_key"
-    # For the UPSTASH_REDIS_TOKEN environment variable, if it's truly a Redis token
-    # that shouldn't be used as a cache key, you should define a separate environment
-    # variable for the cache key, or use a hardcoded string as I've done with "daily_news_feed_cache".
-    # For Upstash Vector connection, ensure UPSTASH_VECTOR_REST_URL and UPSTASH_VECTOR_REST_TOKEN
-    # are configured in your `components.indexers.news_indexer.py`'s `get_upstash_vector_store` function.
-    # Generate and cache the feed
     generated_feed = generate_and_cache_daily_feed()
     print("\n--- Generated and Cached Feed ---")
-    # for item in generated_feed:
-    #     print(f"Topic: {item['topic']}")
-    #     for summary in item['feed']:
-    #         print(f"  - {summary['summary']}")
-    # print(json.dumps(generated_feed, indent=2, ensure_ascii=False)) # For full output
-    # Retrieve from cache
     cached_feed = get_cached_daily_feed()
     print("\n--- Retrieved from Cache ---")
-    # for item in cached_feed:
-    #     print(f"Topic: {item['topic']}")
-    #     for summary in item['feed']:
-    #         print(f"  - {summary['summary']}")
-    # print(json.dumps(cached_feed, indent=2, ensure_ascii=False)) # For full output

 # 🔐 Environment variables
 REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
+REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 # ✅ Redis client
 # 📰 Topic list
 TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
+# This list correctly generates 'india', 'world', etc.
 TOPIC_KEYS = [t.lower().replace(" news", "") for t in TOPICS]
 # 🧠 Summarization prompt
         vector_store = get_upstash_vector_store()
         print("💡 Successfully retrieved Upstash vector store.")
+        # Debugging prints (keep them for now, they are useful)
         print(f"DEBUG: TOPICS = {TOPICS}")
         print(f"DEBUG: TOPIC_KEYS = {TOPIC_KEYS}")
         print(f"DEBUG: Length of TOPICS = {len(TOPICS)}")
         print(f"DEBUG: Length of TOPIC_KEYS = {len(TOPIC_KEYS)}")
+        for full_topic_name, topic_key_for_filter in zip(TOPICS, TOPIC_KEYS):
             try:
+                # *** THE CRITICAL CHANGE IS HERE ***
+                # Use 'topic_key_for_filter' (e.g., "india") which matches your stored metadata
+                # instead of 'full_topic_name' (e.g., "India news").
                 filters = MetadataFilters(
                     filters=[
+                        MetadataFilter(key="topic", value=topic_key_for_filter, operator=FilterOperator.EQ)
                     ]
                 )
                     filters=filters # Apply the metadata filter
                 )
+                print(f"🔎 Querying Upstash for topic: '{full_topic_name}' using filter value '{topic_key_for_filter}'")
                 result = vector_store.query(query)
+                print(f"➡️ Found {len(result.nodes)} nodes for topic: '{full_topic_name}'.")
                 for node in result.nodes:
                     content = node.get_content().strip()
                     if content:
+                        topic_docs[topic_key_for_filter].append(content)
                         # Optional: Print metadata to verify filtering
                         # print(f"  Node metadata: {node.metadata}")
             except Exception as e:
+                print(f"❌ [Topic Metadata Filter error for '{full_topic_name}']: {e}")
     except Exception as e:
         print("❌ [load_all_documents_grouped_by_topic Error]", e)
     return topic_docs
     try:
         client = OpenAI(api_key=OPENAI_API_KEY)
         content = "\n\n---\n\n".join(docs)[:12000]
         print(f"🧠 Summarizing topic via OpenAI: {topic_key} ({len(docs)} documents)")
         completion = client.chat.completions.create(
+            model="gpt-4",
             messages=[
                 {"role": "system", "content": BASE_PROMPT},
                 {"role": "user", "content": content},
             ],
+            max_tokens=512,
+            temperature=0.7,
         )
         text = completion.choices[0].message.content.strip()
         summaries = []
         for line in text.splitlines():
+            line = line.strip("-–• ")
             if line:
                 summaries.append({
                     "summary": line,
+                    "image_url": "https://source.unsplash.com/800x600/?news",
+                    "article_link": f"https://google.com/search?q={topic_key}+news"
                 })
         return summaries
     except Exception as e:
         print(f"❌ [OpenAI Summarization Error for '{topic_key}']: {e}")
         return []
 # 🚀 Main callable
                 feed_map[topic_key] = summaries
             except Exception as e:
                 print(f"❌ [Topic Loop Error for '{topic_key}']: {e}")
                 feed_map[topic_key] = []
+        # When creating final_feed, use TOPICS for the display name but TOPIC_KEYS for mapping
+        final_feed = [{"topic": display_name, "feed": feed_map[actual_key]}
+                      for display_name, actual_key in zip(TOPICS, TOPIC_KEYS)]
         try:
+            cache_key_name = "daily_news_feed_cache"
             redis_client.set(cache_key_name, json.dumps(final_feed, ensure_ascii=False))
             redis_client.expire(cache_key_name, 86400)
             print(f"✅ Cached daily feed under key '{cache_key_name}' with 24-hour expiry.")
         except Exception as e:
             print("❌ [Redis Cache Error]", e)
         return final_feed
     except Exception as e:
         print("❌ [generate_and_cache_daily_feed Overall Error]", e)
         return []
 # 📦 Get cached data
 def get_cached_daily_feed():
     try:
+        cache_key_name = "daily_news_feed_cache"
         cached = redis_client.get(cache_key_name)
         if cached:
             print(f"✅ Retrieved cached daily feed from '{cache_key_name}'.")
             return []
     except Exception as e:
         print("❌ [get_cached_daily_feed Error]", e)
         return []
 # Example of how to run it (for testing purposes, if this were the main script)
 if __name__ == "__main__":
     # Ensure your environment variables are set before running
     # os.environ["UPSTASH_REDIS_URL"] = "your_upstash_redis_url"
+    # os.environ["UPSTASH_REDIS_TOKEN"] = "your_upstash_redis_token"
     # os.environ["OPENAI_API_KEY"] = "your_openai_api_key"
     generated_feed = generate_and_cache_daily_feed()
     print("\n--- Generated and Cached Feed ---")
+    # print(json.dumps(generated_feed, indent=2, ensure_ascii=False))
     cached_feed = get_cached_daily_feed()
     print("\n--- Retrieved from Cache ---")
+    # print(json.dumps(cached_feed, indent=2, ensure_ascii=False))