ragV98 commited on
Commit
69210b9
Β·
1 Parent(s): 989b675

new module - generate feed

Browse files
components/generators/__init__.py ADDED
File without changes
components/generators/daily_feed.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import requests
5
+
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
7
+
8
+ import redis
9
+ from typing import List, Dict
10
+ from llama_index.core import VectorStoreIndex
11
+ from llama_index.core.query_engine import RetrievalQueryEngine
12
+ from components.indexers.news_indexer import load_news_index
13
+
14
+ # Load environment variables
15
+ REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
16
+ REDIS_KEY = os.environ.get("UPSTASH_REDIS_TOKEN")
17
+ MISTRAL_URL = os.environ.get("MISTRAL_URL") # Inference endpoint URL
18
+ HF_TOKEN = os.environ.get("HF_TOKEN") # Hugging Face endpoint token
19
+
20
+ # Connect to Redis
21
+ redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
22
+
23
+ # Topics to query
24
+ TOPICS = ["India news", "World news", "Tech news", "Finance news", "Sports news"]
25
+
26
+ # Prompt to summarize topic
27
+ def build_prompt(content: str, topic: str) -> str:
28
+ return (
29
+ f"You are a news summarizer. Summarize the following content in 25-30 words. "
30
+ f"Make it engaging and informative. Include appropriate emojis. Topic: {topic}\n\n{content}"
31
+ )
32
+
33
+ # Call Mistral via inference endpoint
34
+ def call_mistral(prompt: str) -> str:
35
+ headers = {
36
+ "Authorization": f"Bearer {HF_TOKEN}",
37
+ "Content-Type": "application/json"
38
+ }
39
+ payload = {
40
+ "inputs": [
41
+ {"role": "user", "content": prompt}
42
+ ]
43
+ }
44
+ try:
45
+ response = requests.post(MISTRAL_URL, headers=headers, json=payload, timeout=20)
46
+ response.raise_for_status()
47
+ return response.json()["outputs"][0]["content"].strip()
48
+ except Exception as e:
49
+ print(f"⚠️ Mistral error: {e}")
50
+ return None
51
+
52
+ # Generate summary for topic using Mistral
53
+ def summarize_topic(docs: List[str], topic: str) -> List[Dict]:
54
+ feed = []
55
+ for doc in docs[:5]:
56
+ prompt = build_prompt(doc, topic)
57
+ summary = call_mistral(prompt)
58
+ if summary:
59
+ feed.append({
60
+ "summary": summary,
61
+ "image_url": "https://source.unsplash.com/800x600/?news",
62
+ "article_link": "https://google.com/search?q=" + topic.replace(" ", "+")
63
+ })
64
+ return feed
65
+
66
+ # Main generation pipeline
67
+ def generate_and_cache_daily_feed():
68
+ index: VectorStoreIndex = load_news_index()
69
+ query_engine = RetrievalQueryEngine.from_args(index)
70
+
71
+ final_feed = []
72
+ for topic in TOPICS:
73
+ print(f"\nπŸ” Generating for: {topic}")
74
+ response = query_engine.query(topic)
75
+ docs = [str(node.get_content()) for node in response.source_nodes]
76
+
77
+ topic_feed = summarize_topic(docs, topic)
78
+ final_feed.append({
79
+ "topic": topic.lower().replace(" news", ""),
80
+ "feed": topic_feed
81
+ })
82
+
83
+ # Cache to Redis
84
+ redis_client.set(REDIS_KEY, json.dumps(final_feed, ensure_ascii=False))
85
+ print(f"βœ… Cached daily feed under key '{REDIS_KEY}'")
86
+ return final_feed
87
+
88
+ # Redis fetch for API
89
+
90
+ def get_cached_daily_feed():
91
+ cached = redis_client.get(REDIS_KEY)
92
+ return json.loads(cached) if cached else []
pipeline/news_ingest.py CHANGED
@@ -8,6 +8,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8
  from components.indexers.news_indexer import get_or_build_index_from_docs
9
  from components.fetchers.google_search import fetch_google_news
10
  from components.fetchers.scraper import scrape_url
 
11
  from llama_index.core.settings import Settings
12
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
13
  from llama_index.core.schema import Document
@@ -97,4 +98,7 @@ if __name__ == "__main__":
97
  documents = build_documents(all_articles)
98
  get_or_build_index_from_docs(documents)
99
 
100
- print(f"βœ… Indexed and stored at: {INDEX_DIR}")
 
 
 
 
8
  from components.indexers.news_indexer import get_or_build_index_from_docs
9
  from components.fetchers.google_search import fetch_google_news
10
  from components.fetchers.scraper import scrape_url
11
+ from components.generators.daily_feed import generate_and_cache_daily_feed
12
  from llama_index.core.settings import Settings
13
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
14
  from llama_index.core.schema import Document
 
98
  documents = build_documents(all_articles)
99
  get_or_build_index_from_docs(documents)
100
 
101
+ print("⚑ Generating daily feed...")
102
+ generate_and_cache_daily_feed(documents) # πŸ‘ˆ CALLS HEADLINE BUILDER
103
+
104
+ print(f"βœ… Indexed, headlines generated, and stored at: {INDEX_DIR}")