again
Browse files
components/generators/daily_feed.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
import redis
|
|
|
4 |
from typing import List, Dict
|
5 |
from openai import OpenAI
|
6 |
from components.indexers.news_indexer import get_upstash_vector_store
|
7 |
-
from llama_index.core import
|
|
|
8 |
|
9 |
# π Environment variables
|
10 |
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
|
@@ -28,26 +30,29 @@ BASE_PROMPT = (
|
|
28 |
"Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
|
29 |
)
|
30 |
|
31 |
-
# π₯ Load topic
|
32 |
def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
|
33 |
-
topic_docs = {}
|
34 |
|
35 |
try:
|
36 |
vector_store = get_upstash_vector_store()
|
37 |
-
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
38 |
|
39 |
-
|
40 |
-
all_nodes = vector_store._data.values() # Upstash uses `_data` internally to store nodes
|
41 |
-
|
42 |
-
for node in all_nodes:
|
43 |
try:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
-
print(f"β [
|
51 |
|
52 |
except Exception as e:
|
53 |
print("β [load_all_documents_grouped_by_topic Error]", e)
|
@@ -62,9 +67,9 @@ def summarize_topic(topic_key: str, docs: List[str]) -> List[Dict]:
|
|
62 |
|
63 |
try:
|
64 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
65 |
-
content = "\n\n---\n\n".join(docs)[:12000]
|
66 |
|
67 |
-
print(f"π§ Summarizing topic via
|
68 |
completion = client.chat.completions.create(
|
69 |
model="gpt-4",
|
70 |
messages=[
|
@@ -96,7 +101,6 @@ def generate_and_cache_daily_feed():
|
|
96 |
try:
|
97 |
print("π Running OpenAI-powered daily feed generator....")
|
98 |
topic_docs = load_all_documents_grouped_by_topic()
|
99 |
-
print('Loaded topics', topic_docs)
|
100 |
feed_map = {}
|
101 |
|
102 |
for topic_key in TOPIC_KEYS:
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import redis
|
4 |
+
import numpy as np
|
5 |
from typing import List, Dict
|
6 |
from openai import OpenAI
|
7 |
from components.indexers.news_indexer import get_upstash_vector_store
|
8 |
+
from llama_index.core import StorageContext
|
9 |
+
from llama_index.vector_stores.types import VectorStoreQuery
|
10 |
|
11 |
# π Environment variables
|
12 |
REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
|
|
|
30 |
"Return up to 3 punchy headlines, each under 20 words, written like a premium editorial bulletin."
|
31 |
)
|
32 |
|
33 |
+
# π₯ Load documents grouped by topic from Upstash
|
34 |
def load_all_documents_grouped_by_topic() -> Dict[str, List[str]]:
|
35 |
+
topic_docs = {key: [] for key in TOPIC_KEYS}
|
36 |
|
37 |
try:
|
38 |
vector_store = get_upstash_vector_store()
|
|
|
39 |
|
40 |
+
for topic, key in zip(TOPICS, TOPIC_KEYS):
|
|
|
|
|
|
|
41 |
try:
|
42 |
+
dummy_vector = np.random.rand(384).tolist() # Assuming MiniLM embeddings
|
43 |
+
query = VectorStoreQuery(
|
44 |
+
query_embedding=dummy_vector,
|
45 |
+
similarity_top_k=50,
|
46 |
+
filters={"topic": topic}
|
47 |
+
)
|
48 |
+
result = vector_store.query(query)
|
49 |
+
|
50 |
+
for node in result.nodes:
|
51 |
+
content = node.get_content().strip()
|
52 |
+
if content:
|
53 |
+
topic_docs[key].append(content)
|
54 |
except Exception as e:
|
55 |
+
print(f"β [Topic Metadata Filter error: {key}]", e)
|
56 |
|
57 |
except Exception as e:
|
58 |
print("β [load_all_documents_grouped_by_topic Error]", e)
|
|
|
67 |
|
68 |
try:
|
69 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
70 |
+
content = "\n\n---\n\n".join(docs)[:12000]
|
71 |
|
72 |
+
print(f"π§ Summarizing topic via OpenAI: {topic_key}")
|
73 |
completion = client.chat.completions.create(
|
74 |
model="gpt-4",
|
75 |
messages=[
|
|
|
101 |
try:
|
102 |
print("π Running OpenAI-powered daily feed generator....")
|
103 |
topic_docs = load_all_documents_grouped_by_topic()
|
|
|
104 |
feed_map = {}
|
105 |
|
106 |
for topic_key in TOPIC_KEYS:
|