Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

ragV98 commited on 30 days ago

Commit

af23d2f

1 Parent(s): e4a76c1

phewwww

Browse files

Files changed (3) hide show

app.py +33 -6
components/generators/detailed_explainer.py +191 -0
routes/api/descriptive.py +95 -0

app.py CHANGED Viewed

@@ -1,8 +1,33 @@
 from fastapi import FastAPI
-from routes.api import ingest, query, headlines
-from llama_index.core.settings import Settings
-Settings.llm = None
 app = FastAPI()
@@ -11,6 +36,8 @@ app = FastAPI()
 def greet():
     return {"welcome": "nuse ai"}
-app.include_router(ingest.router)
-app.include_router(query.router)
-app.include_router(headlines.router)

+# app.py
+import os
+import sys
 from fastapi import FastAPI
+# --- Crucial for finding your modules ---
+# Assuming app.py is at the project root level.
+# This ensures Python can find 'components' and 'routes' as packages.
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "components")))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "routes"))) # Add the routes directory
+# Add routes/api to path if you are doing 'from routes.api import module' directly
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "routes", "api")))
+# Import your routers
+# These imports expect routes/api/ingest.py, routes/api/query.py, routes/api/headlines.py to exist
+from routes.api import ingest as ingest_router_module
+from routes.api import query as query_router_module # Assuming this exists
+from routes.api import headlines as headlines_router_module
+# NOTE: Settings.llm = None
+# This line is problematic if LlamaIndex components in your pipeline (like query engine)
+# rely on a global LLM setting. If you intend to use an LLM with LlamaIndex features,
+# you would set it here, e.g., `Settings.llm = OpenAI()`
+# For this current pipeline, the OpenAI client is initialized explicitly within
+# daily_feed.py and detailed_explainer.py, so setting Settings.llm here is not strictly needed
+# but also not harmful if it's just meant as a placeholder for a different use case.
+# I will leave it commented out as per your original request, but be aware of its implications.
+# Settings.llm = None
 app = FastAPI()
 def greet():
     return {"welcome": "nuse ai"}
+# Include your routers
+# Use .router to access the APIRouter instance from the imported modules
+app.include_router(ingest_router_module.router, prefix="/api/ingest")
+app.include_router(query_router_module.router, prefix="/api/query") # Assuming query.py exists
+app.include_router(headlines_router_module.router, prefix="/api/headlines")

components/generators/detailed_explainer.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import os
+import json
+import numpy as np
+import redis
+from typing import List, Dict, Any, Optional, Set
+from openai import OpenAI
+from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilter, MetadataFilters, FilterOperator
+from llama_index.core.schema import TextNode
+from components.indexers.news_indexer import get_upstash_vector_store
+import logging
+from llama_index.core.settings import Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# 🔐 Environment variables for this module
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+REDIS_URL = os.environ.get("UPSTASH_REDIS_URL", "redis://localhost:6379")
+# ✅ Redis client for this module
+try:
+    detailed_explainer_redis_client = redis.Redis.from_url(REDIS_URL, decode_responses=True)
+    detailed_explainer_redis_client.ping()
+    logging.info("Redis client initialized for detailed_explainer.py.")
+except Exception as e:
+    logging.critical(f"❌ FATAL ERROR: Could not connect to Redis in detailed_explainer.py: {e}")
+    raise
+# Cache Key specific to detailed explanations
+DETAILED_FEED_CACHE_KEY = "detailed_news_feed_cache"
+# Ensure Settings.embed_model is configured globally.
+try:
+    if not hasattr(Settings, 'embed_model') or Settings.embed_model is None:
+        logging.info("Settings.embed_model not yet configured, initializing with default HuggingFaceEmbedding.")
+        Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
+except Exception as e:
+    logging.error(f"Failed to initialize Settings.embed_model in detailed_explainer: {e}")
+# LLM prompt for detailed explanation
+EXPLAINER_PROMPT = (
+    "You are an expert news analyst. Based on the following article content, "
+    "generate a concise, detailed explanation (50-60 words) for the headline provided. "
+    "Focus on the 'why it matters' and key context. Do not include any introductory phrases, just the explanation itself."
+    "\n\nHeadline: {headline}"
+    "\n\nArticle Content:\n{article_content}"
+    "\n\nDetailed Explanation (50-60 words):"
+)
+async def get_detailed_explanation_from_vector(
+    summary_item: Dict[str, Any],
+    vector_store_client: Any
+) -> Dict[str, Any]:
+    """
+    Takes a summary item, queries the vector store for its original article content,
+    and generates a detailed explanation using an LLM.
+    """
+    headline_text = summary_item["summary"]
+    representative_article_link = summary_item["article_link"]
+    representative_title = summary_item["representative_title"]
+    detailed_content = ""
+    sources_found: Set[str] = set()
+    logging.info(f"Retrieving detailed content for headline: '{headline_text}' (from {representative_article_link})")
+    try:
+        query_text = f"{representative_title} {representative_article_link}" if representative_title else representative_article_link
+        query_embedding = Settings.embed_model.embed_query(query_text)
+        filters = MetadataFilters(
+            filters=[MetadataFilter(key="url", value=representative_article_link, operator=FilterOperator.EQ)]
+        )
+        query = VectorStoreQuery(
+            query_embedding=query_embedding,
+            similarity_top_k=5,
+            filters=filters
+        )
+        result = vector_store_client.query(query)
+        if result.nodes:
+            for node in result.nodes:
+                node_content = node.get_content().strip()
+                if node_content:
+                    detailed_content += node_content + "\n\n"
+                    if "source" in node.metadata:
+                        sources_found.add(node.metadata["source"])
+            if not detailed_content:
+                 logging.warning(f"No usable content found in nodes retrieved for URL: {representative_article_link}. Falling back to title+url context.")
+                 detailed_content = representative_title + " " + representative_article_link
+        else:
+            logging.warning(f"No original article found in vector store for URL: {representative_article_link}. Using summary as context.")
+            detailed_content = summary_item["summary"] + ". " + summary_item.get("explanation", "")
+    except Exception as e:
+        logging.error(f"❌ Error querying vector store for detailed content for '{representative_article_link}': {e}", exc_info=True)
+        detailed_content = summary_item["summary"] + ". " + summary_item.get("explanation", "")
+    # Generate detailed explanation using LLM
+    detailed_explanation_text = ""
+    try:
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        if not OPENAI_API_KEY:
+             raise ValueError("OPENAI_API_KEY is not set.")
+        llm_response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": "You are a concise and informative news explainer."},
+                {"role": "user", "content": EXPLAINER_PROMPT.format(
+                    headline=headline_text,
+                    article_content=detailed_content
+                )},
+            ],
+            max_tokens=100,
+            temperature=0.4,
+        )
+        detailed_explanation_text = llm_response.choices[0].message.content.strip()
+        logging.info(f"Generated detailed explanation for '{headline_text}'.")
+    except Exception as e:
+        logging.error(f"❌ Error generating detailed explanation for '{headline_text}': {e}", exc_info=True)
+        detailed_explanation_text = summary_item.get("explanation", "Could not generate a detailed explanation.")
+    return {
+        "title": headline_text,
+        "description": detailed_explanation_text,
+        "sources": list(sources_found) if sources_found else ["General News Sources"]
+    }
+async def generate_detailed_feed(
+    cached_feed: Dict[str, Dict[int, Dict[str, Any]]]
+) -> Dict[str, Dict[int, Dict[str, Any]]]:
+    """
+    Generates detailed explanations for each summary in the cached feed.
+    Does NOT cache the result internally. The caller is responsible for caching.
+    """
+    if not cached_feed:
+        logging.info("No cached feed found to generate detailed explanations from.")
+        return {}
+    detailed_feed_structured: Dict[str, Dict[int, Dict[str, Any]]] = {}
+    vector_store = get_upstash_vector_store()
+    for topic_key, summaries_map in cached_feed.items():
+        logging.info(f"Processing detailed explanations for topic: {topic_key}")
+        detailed_summaries_for_topic: Dict[int, Dict[str, Any]] = {}
+        for summary_id in sorted(summaries_map.keys()):
+            summary_item = summaries_map[summary_id]
+            detailed_item = await get_detailed_explanation_from_vector(summary_item, vector_store)
+            detailed_summaries_for_topic[summary_id] = detailed_item
+        detailed_feed_structured[topic_key] = detailed_summaries_for_topic
+    logging.info("✅ Detailed explanation generation complete.")
+    return detailed_feed_structured
+def cache_detailed_feed(feed_data: Dict[str, Dict[int, Dict[str, Any]]]):
+    """Caches the given detailed feed data to Redis using its dedicated client."""
+    try:
+        detailed_explainer_redis_client.set(DETAILED_FEED_CACHE_KEY, json.dumps(feed_data, ensure_ascii=False))
+        detailed_explainer_redis_client.expire(DETAILED_FEED_CACHE_KEY, 86400)
+        logging.info(f"✅ Detailed feed cached under key '{DETAILED_FEED_CACHE_KEY}' with 24-hour expiry.")
+    except Exception as e:
+        logging.error(f"❌ [Redis detailed feed caching error]: {e}", exc_info=True)
+        raise
+def get_cached_detailed_feed() -> Dict[str, Dict[int, Dict[str, Any]]]:
+    """Retrieves the cached detailed feed from Redis using its dedicated client."""
+    try:
+        cached_raw = detailed_explainer_redis_client.get(DETAILED_FEED_CACHE_KEY)
+        if cached_raw:
+            logging.info(f"✅ Retrieved cached detailed feed from '{DETAILED_FEED_CACHE_KEY}'.")
+            return json.loads(cached_raw)
+        else:
+            logging.info(f"ℹ️ No cached detailed feed found under key '{DETAILED_FEED_CACHE_KEY}'.")
+            return {}
+    except Exception as e:
+        logging.error(f"❌ [Redis detailed feed retrieval error]: {e}", exc_info=True)
+        return {}

routes/api/descriptive.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# routes/api/headlines.py
+from fastapi import APIRouter, HTTPException, status
+import logging
+from typing import Dict, Any
+# Import functions directly from the now standalone detailed_explainer
+# Ensure sys.path in app.py allows these imports to components/generators
+from components.generators.detailed_explainer import (
+    generate_detailed_feed,
+    cache_detailed_feed,
+    get_cached_detailed_feed
+)
+# We also need to get the initial summaries, which are managed by daily_feed.py
+from components.generators.daily_feed import get_cached_daily_feed
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+router = APIRouter()
+@router.post("/generate-detailed") # Endpoint for triggering detailed generation
+async def generate_detailed_headlines_endpoint() -> Dict[str, Any]:
+    """
+    Generates detailed explanations for the latest cached summaries.
+    This step requires initial summaries to be present in Redis cache (from daily_feed.py).
+    The final detailed feed is then cached by this endpoint using its dedicated key.
+    """
+    logging.info("API Call: POST /api/headlines/generate-detailed initiated.")
+    try:
+        # Step 1: Retrieve the cached initial summaries
+        initial_summaries = get_cached_daily_feed() # This gets data from "initial_news_summary_cache"
+        if not initial_summaries:
+            logging.warning("No initial summaries found in cache to generate detailed explanations from.")
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="No initial news summaries found in cache. Please run the ingestion/summarization process first (e.g., /api/ingest/run)."
+            )
+        # Step 2: Generate detailed explanations (this is an async call to detailed_explainer)
+        detailed_feed = await generate_detailed_feed(initial_summaries)
+        if not detailed_feed:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Failed to generate detailed explanations. Check server logs for errors during LLM calls or content retrieval."
+            )
+        # Step 3: Cache the final detailed feed using the function from detailed_explainer
+        # This function (cache_detailed_feed) internally uses its own Redis client and DETAILED_FEED_CACHE_KEY
+        cache_detailed_feed(detailed_feed)
+        logging.info("API Call: POST /api/headlines/generate-detailed completed successfully.")
+        total_items = sum(len(topic_summaries) for topic_summaries in detailed_feed.values())
+        return {"status": "success", "message": "Detailed headlines generated and cached.", "items": total_items}
+    except HTTPException as he:
+        raise he # Re-raise FastAPI's HTTPExceptions
+    except Exception as e:
+        logging.error(f"Error in /api/headlines/generate-detailed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"An unexpected error occurred during detailed feed generation: {e}"
+        )
+@router.get("/get-detailed") # Endpoint for retrieving detailed headlines
+async def get_detailed_headlines_endpoint() -> Dict[str, Dict[int, Dict[str, Any]]]:
+    """
+    Retrieves the most recently cached *fully detailed* news feed.
+    Returns 404 if no detailed feed is found in cache.
+    """
+    logging.info("API Call: GET /api/headlines/get-detailed initiated.")
+    try:
+        # Retrieve the cached detailed feed using the function from detailed_explainer
+        cached_detailed_feed = get_cached_detailed_feed()
+        if not cached_detailed_feed:
+            logging.info("No full detailed news feed found in cache.")
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="No detailed news feed found in cache. Please run /api/headlines/generate-detailed first."
+            )
+        logging.info("API Call: GET /api/headlines/get-detailed completed successfully.")
+        return cached_detailed_feed
+    except HTTPException as he:
+        raise he
+    except Exception as e:
+        logging.error(f"Error in /api/headlines/get-detailed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"An unexpected error occurred while retrieving cached detailed feed: {e}"
+        )