Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on Jul 17

Commit

69210b9

1 Parent(s): 989b675

new module - generate feed

Browse files

Files changed (3) hide show

components/generators/__init__.py +0 -0
components/generators/daily_feed.py +92 -0
pipeline/news_ingest.py +5 -1

components/generators/__init__.py ADDED Viewed

File without changes

components/generators/daily_feed.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import sys
+import json
+import requests
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import redis
+from typing import List, Dict
+from llama_index.core import VectorStoreIndex
+from llama_index.core.query_engine import RetrievalQueryEngine
+from components.indexers.news_indexer import load_news_index
+# Load environment variables
+REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
+REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
+MISTRAL_URL = os.environ.get("MISTRAL_URL")  # Inference endpoint URL
+HF_TOKEN = os.environ.get("HF_TOKEN")        # Hugging Face endpoint token
+# Connect to Redis
+redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
+# Topics to query
+TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
+# Prompt to summarize topic
+def build_prompt(content: str, topic: str) -> str:
+    return (
+        f"You are a news summarizer. Summarize the following content in 25-30 words. "
+        f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
+    )
+# Call Mistral via inference endpoint
+def call_mistral(prompt: str) -> str:
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "inputs": [
+            {"role": "user", "content": prompt}
+        ]
+    }
+    try:
+        response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
+        response.raise_for_status()
+        return response.json()["outputs"][0]["content"].strip()
+    except Exception as e:
+        print(f"⚠️ Mistral error: {e}")
+        return None
+# Generate summary for topic using Mistral
+def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
+    feed = []
+    for doc in docs[:5]:
+        prompt = build_prompt(doc, topic)
+        summary = call_mistral(prompt)
+        if summary:
+            feed.append({
+                "summary": summary,
+                "image_url": "https://source.unsplash.com/800x600/?news",
+                "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
+            })
+    return feed
+# Main generation pipeline
+def generate_and_cache_daily_feed():
+    index: VectorStoreIndex = load_news_index()
+    query_engine = RetrievalQueryEngine.from_args(index)
+    final_feed = []
+    for topic in TOPICS:
+        print(f"\n🔍 Generating for: {topic}")
+        response = query_engine.query(topic)
+        docs = [str(node.get_content()) for node in response.source_nodes]
+        topic_feed = summarize_topic(docs, topic)
+        final_feed.append({
+            "topic": topic.lower().replace(" news", ""),
+            "feed": topic_feed
+        })
+    # Cache to Redis
+    redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
+    print(f"✅ Cached daily feed under key '{REDIS_KEY}'")
+    return final_feed
+# Redis fetch for API
+def get_cached_daily_feed():
+    cached = redis_client.get(REDIS_KEY)
+    return json.loads(cached) if cached else []

pipeline/news_ingest.py CHANGED Viewed

@@ -8,6 +8,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from components.indexers.news_indexer import get_or_build_index_from_docs
 from components.fetchers.google_search import fetch_google_news
 from components.fetchers.scraper import scrape_url
 from llama_index.core.settings import Settings
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core.schema import Document
@@ -97,4 +98,7 @@ if __name__ == "__main__":
         documents = build_documents(all_articles)
         get_or_build_index_from_docs(documents)
-        print(f"✅ Indexed and stored at: {INDEX_DIR}")

 from components.indexers.news_indexer import get_or_build_index_from_docs
 from components.fetchers.google_search import fetch_google_news
 from components.fetchers.scraper import scrape_url
+from components.generators.daily_feed import generate_and_cache_daily_feed
 from llama_index.core.settings import Settings
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core.schema import Document
         documents = build_documents(all_articles)
         get_or_build_index_from_docs(documents)
+        print("⚡ Generating daily feed...")
+        generate_and_cache_daily_feed(documents)  # 👈 CALLS HEADLINE BUILDER
+        print(f"✅ Indexed, headlines generated, and stored at: {INDEX_DIR}")