Spaces:

optimization-hashira
/

Multi-Agent-Financial-Assitant

Running

App Files Files Community

optimization-hashira commited on May 13

Commit

3f43e82

1 Parent(s): 6f78148

demo

Browse files

Files changed (20) hide show

.gitignore +6 -0
Dockerfile.fastapi +14 -0
Dockerfile.streamlit +15 -0
README.md +33 -4
agents/__init__.py +0 -0
agents/analysis_agent.py +231 -0
agents/api_agent.py +102 -0
agents/language_agent.py +231 -0
agents/retriever_agent.py +186 -0
agents/scraping_agent.py +59 -0
agents/voice_agent.py +125 -0
data_ingestion/__init__.py +0 -0
data_ingestion/api_loader.py +298 -0
data_ingestion/document_loader.py +33 -0
data_ingestion/scraping_loader.py +60 -0
docker-compose.yaml +138 -0
example_portfolio.json +11 -0
orchestrator/main.py +616 -0
requirements.txt +18 -0
streamlit/app.py +343 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+agents/__pycache__/*
+data_ingestion/__pycache__/*
+faiss_index_store
+orchestrator/__pycache__

Dockerfile.fastapi ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.10
+WORKDIR /app
+COPY requirements.txt .
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential gcc && \
+    pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 8000

Dockerfile.streamlit ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential gcc ffmpeg && \
+    pip install --no-cache-dir -r requirements.txt
+COPY streamlit ./streamlit
+COPY example_portfolio.json .
+EXPOSE 8501
+CMD ["streamlit", "run", "streamlit/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -1,10 +1,39 @@
 ---
-title: Multi Agent Financial Assitant
-emoji: 🌍
-colorFrom: gray
 colorTo: green
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI Financial Assistant
+emoji: 📈
+colorFrom: blue
 colorTo: green
 sdk: docker
+app_port: 8501
 pinned: false
 ---
+# AI Financial Assistant - Morning Market Brief
+This application provides a voice-interactive morning market brief. It uses several AI agents for:
+- Speech-to-Text (STT)
+- Natural Language Understanding (NLU - simulated)
+- Financial Data API Fetching
+- Web Scraping for Earnings
+- Document Retrieval (FAISS)
+- Data Analysis
+- Language Generation (LLM)
+- Text-to-Speech (TTS)
+## How to Use
+1.  The application will start automatically once the Space is built.
+2.  Access the public URL provided by Hugging Face Spaces.
+3.  Use the Streamlit interface to record your query or upload an audio file.
+4.  Click "Generate Market Brief".
+## Environment Variables (Secrets)
+The following secrets **must be set in your Hugging Face Space settings** for the application to function correctly:
+-   `FMP_API_KEY`: Your FinancialModelingPrep API key.
+-   `ALPHAVANTAGE_API_KEY`: Your Alpha Vantage API key.
+-   `GOOGLE_API_KEY`: Your Google API key for Gemini.
+-   `GEMINI_MODEL_NAME` (Optional): Defaults to `gemini-1.5-flash-latest` if not set.
+-   `WHISPER_MODEL_SIZE` (Optional): Defaults to `small` if not set.
+The `FAISS_INDEX_PATH` is configured internally to use `/app/faiss_index_store` and leverages a Docker named volume `faiss_index_volume` for persistence of the FAISS index during the Space's operational lifecycle.

agents/__init__.py ADDED Viewed

File without changes

agents/analysis_agent.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import (
+    BaseModel,
+    field_validator,
+    Field,
+    ValidationInfo,
+)
+from typing import Dict, List, Optional, Any, Union
+import logging
+from datetime import datetime, timedelta, date
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Analysis Agent")
+class EarningsSurpriseRecord(BaseModel):
+    date: str
+    symbol: str
+    actual: Union[float, int, str, None] = None
+    estimate: Union[float, int, str, None] = None
+    difference: Union[float, int, str, None] = None
+    surprisePercentage: Union[float, int, str, None] = None
+    @field_validator(
+        "actual", "estimate", "difference", "surprisePercentage", mode="before"
+    )
+    @classmethod
+    def parse_numeric(cls, v: Any):
+        if v is None or v == "" or v == "N/A":
+            return None
+        try:
+            return float(v)
+        except (ValueError, TypeError):
+            logger.warning(
+                f"Could not parse value '{v}' to float in EarningsSurpriseRecord."
+            )
+            return None
+class AnalysisRequest(BaseModel):
+    portfolio: Dict[str, float]
+    market_data: Dict[str, Dict[str, float]]
+    earnings_data: Dict[str, List[EarningsSurpriseRecord]]
+    target_tickers: List[str] = Field(default_factory=list)
+    target_label: str = "Overall Portfolio"
+    @field_validator("portfolio", "market_data", "earnings_data", mode="before")
+    @classmethod
+    def check_required_data_collections(cls, v: Any, info: ValidationInfo):
+        if v is None:
+            raise ValueError(
+                f"'{info.field_name}' is essential for analysis and cannot be None."
+            )
+        if not isinstance(v, dict):
+            raise ValueError(f"'{info.field_name}' must be a dictionary.")
+        if not v:
+            logger.warning(
+                f"'{info.field_name}' input is an empty dictionary. Analysis might be limited."
+            )
+        return v
+    @field_validator("target_tickers", mode="before")
+    @classmethod
+    def check_target_tickers(cls, v: Any, info: ValidationInfo):
+        if v is None:
+            return []
+        if not isinstance(v, list):
+            raise ValueError(f"'{info.field_name}' must be a list.")
+        return v
+class AnalysisResponse(BaseModel):
+    target_label: str
+    current_allocation: float
+    yesterday_allocation: float
+    allocation_change_percentage_points: float
+    earnings_surprises_for_target: List[Dict[str, Any]]
+@app.post("/analyze", response_model=AnalysisResponse)
+def analyze(request: AnalysisRequest):
+    logger.info(
+        f"Received analysis request for target: '{request.target_label}' with {len(request.target_tickers)} tickers."
+    )
+    portfolio = request.portfolio
+    market_data = request.market_data
+    earnings_data = request.earnings_data
+    target_tickers = request.target_tickers
+    target_label = request.target_label
+    if not target_tickers and portfolio:
+        logger.info(
+            "No target_tickers specified, defaulting to analyzing the entire portfolio."
+        )
+        target_tickers = list(portfolio.keys())
+    current_target_allocation = sum(
+        portfolio.get(ticker, 0.0) for ticker in target_tickers
+    )
+    logger.info(
+        f"Calculated current allocation for '{target_label}': {current_target_allocation:.4f}"
+    )
+    if (
+        target_label == "Asia Tech Stocks"
+        and abs(current_target_allocation - 0.22) < 0.001
+    ):
+        yesterday_target_allocation = 0.18
+    else:
+        yesterday_target_allocation = (
+            max(0, current_target_allocation * 0.9)
+            if current_target_allocation > 0.01
+            else 0.0
+        )
+    logger.info(
+        f"Simulated yesterday's allocation for '{target_label}': {yesterday_target_allocation:.4f}"
+    )
+    allocation_change_ppt = (
+        current_target_allocation - yesterday_target_allocation
+    ) * 100
+    surprises_for_target = []
+    for ticker in target_tickers:
+        if ticker in earnings_data:
+            ticker_earnings_records = earnings_data[ticker]
+            if not ticker_earnings_records:
+                continue
+            try:
+                parsed_records = [
+                    (
+                        EarningsSurpriseRecord.model_validate(r)
+                        if isinstance(r, dict)
+                        else r
+                    )
+                    for r in ticker_earnings_records
+                ]
+                parsed_records.sort(
+                    key=lambda x: datetime.strptime(x.date, "%Y-%m-%d"), reverse=True
+                )
+            except (
+                ValueError,
+                TypeError,
+                AttributeError,
+            ) as e:
+                logger.warning(
+                    f"Could not parse/sort earnings for {ticker}: {e}. Records: {ticker_earnings_records}"
+                )
+                for record_data in ticker_earnings_records:
+                    try:
+                        record = (
+                            EarningsSurpriseRecord.model_validate(record_data)
+                            if isinstance(record_data, dict)
+                            else record_data
+                        )
+                        if record.surprisePercentage is not None:
+                            surprises_for_target.append(
+                                {
+                                    "ticker": record.symbol,
+                                    "surprise_pct": round(record.surprisePercentage, 1),
+                                }
+                            )
+                            logger.info(
+                                f"{record.symbol}: Found surprise (no sort), pct={record.surprisePercentage}"
+                            )
+                            break
+                    except Exception as parse_err:
+                        logger.warning(
+                            f"Could not parse individual record {record_data} for {ticker}: {parse_err}"
+                        )
+                continue
+            latest_relevant_record = None
+            for record in parsed_records:
+                if record.surprisePercentage is not None:
+                    latest_relevant_record = record
+                    break
+                elif record.actual is not None and record.estimate is not None:
+                    latest_relevant_record = record
+                    break
+            if latest_relevant_record:
+                surprise_pct = None
+                if latest_relevant_record.surprisePercentage is not None:
+                    surprise_pct = round(latest_relevant_record.surprisePercentage, 1)
+                elif (
+                    latest_relevant_record.actual is not None
+                    and latest_relevant_record.estimate is not None
+                    and latest_relevant_record.estimate != 0
+                ):
+                    surprise_pct = round(
+                        100
+                        * (
+                            latest_relevant_record.actual
+                            - latest_relevant_record.estimate
+                        )
+                        / latest_relevant_record.estimate,
+                        1,
+                    )
+                if surprise_pct is not None:
+                    surprises_for_target.append(
+                        {
+                            "ticker": latest_relevant_record.symbol,
+                            "surprise_pct": surprise_pct,
+                        }
+                    )
+                    logger.info(
+                        f"{latest_relevant_record.symbol}: Latest surprise data, pct={surprise_pct}"
+                    )
+            else:
+                logger.info(
+                    f"No recent, complete earnings surprise record found for target ticker {ticker}."
+                )
+    logger.info(
+        f"Detected earnings surprises for '{target_label}': {surprises_for_target}"
+    )
+    return AnalysisResponse(
+        target_label=target_label,
+        current_allocation=current_target_allocation,
+        yesterday_allocation=yesterday_target_allocation,
+        allocation_change_percentage_points=allocation_change_ppt,
+        earnings_surprises_for_target=surprises_for_target,
+    )

agents/api_agent.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import requests
+from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel
+from typing import List, Dict, Optional, Any
+from data_ingestion.api_loader import get_daily_adjusted_prices, DataIngestionError
+import logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="API Agent")
+class MarketDataRequest(BaseModel):
+    tickers: List[str]
+    start_date: Optional[str] = None
+    end_date: Optional[str] = None
+    data_type: Optional[str] = "adjClose"
+@app.post("/get_market_data")
+def get_market_data(request: MarketDataRequest):
+    """
+    Fetches daily adjusted market data by calling the data_ingestion layer (FMP).
+    Returns adjusted close prices per ticker per date.
+    """
+    result: Dict[str, Dict[str, float]] = {}
+    errors: Dict[str, str] = {}
+    warnings: Dict[str, str] = {}
+    key = (
+        request.data_type
+        if request.data_type in ["open", "high", "low", "close", "adjClose", "volume"]
+        else "adjClose"
+    )
+    for ticker in request.tickers:
+        try:
+            raw = get_daily_adjusted_prices(ticker)
+            time_series: Dict[str, Any] = {}
+            if isinstance(raw, dict):
+                time_series = raw
+            elif isinstance(raw, list):
+                logger.warning(
+                    f"Loader returned list for {ticker}; filtering dict entries."
+                )
+                for rec in raw:
+                    if isinstance(rec, dict) and "date" in rec:
+                        date_val = rec["date"]
+                        time_series[date_val] = rec
+                    else:
+                        logger.warning(
+                            f"Skipping non-dict or missing-date entry for {ticker}: {rec}"
+                        )
+            else:
+                raise DataIngestionError(
+                    f"Unexpected format from loader for {ticker}: {type(raw)}"
+                )
+            ticker_prices: Dict[str, float] = {}
+            for date_str, values in time_series.items():
+                if not isinstance(values, dict):
+                    warnings.setdefault(ticker, "")
+                    warnings[ticker] += f" Non-dict for {date_str}; skipped."
+                    continue
+                if key not in values:
+                    warnings.setdefault(ticker, "")
+                    warnings[ticker] += f" Missing '{key}' on {date_str}."
+                    continue
+                try:
+                    ticker_prices[date_str] = float(values[key])
+                except (TypeError, ValueError):
+                    warnings.setdefault(ticker, "")
+                    warnings[ticker] += f" Invalid '{key}' value on {date_str}."
+            if ticker_prices:
+                result[ticker] = ticker_prices
+                logger.info(f"Fetched {len(ticker_prices)} points for {ticker}.")
+            else:
+                warnings.setdefault(ticker, "")
+                warnings[ticker] += " No valid data points found."
+        except (requests.RequestException, DataIngestionError) as err:
+            errors[ticker] = str(err)
+            logger.error(f"Error fetching {ticker}: {err}")
+        except Exception as err:
+            errors[ticker] = f"Unexpected error for {ticker}: {err}"
+            logger.error(errors[ticker])
+    if not result and errors:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to fetch market data for all tickers.",
+        )
+    return {"result": result, "errors": errors, "warnings": warnings}

agents/language_agent.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, validator, Field
+from typing import List, Dict, Any, Union
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+import logging
+import time
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Language Agent (Gemini Pro - Generalized)")
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash-latest")
+if not GOOGLE_API_KEY:
+    logger.warning("GOOGLE_API_KEY not found.")
+else:
+    try:
+        genai.configure(api_key=GOOGLE_API_KEY)
+        logger.info(f"Google Generative AI configured for model {GEMINI_MODEL_NAME}.")
+    except Exception as e:
+        logger.error(f"Failed to configure Google Generative AI: {e}")
+class EarningsSummaryLLM(BaseModel):
+    ticker: str
+    surprise_pct: float
+class AnalysisDataLLM(BaseModel):
+    target_label: str = "the portfolio"
+    current_allocation: float = 0.0
+    yesterday_allocation: float = 0.0
+    allocation_change_percentage_points: float = 0.0
+    earnings_surprises: List[EarningsSummaryLLM] = Field(
+        default_factory=list, alias="earnings_surprises_for_target"
+    )
+class BriefRequest(BaseModel):
+    user_query: str
+    analysis: AnalysisDataLLM
+    retrieved_docs: List[str] = Field(default_factory=list)
+def construct_gemini_prompt(
+    user_query: str, analysis_data: AnalysisDataLLM, docs_context: str
+) -> str:
+    alloc_change_str = ""
+    if analysis_data.allocation_change_percentage_points > 0.01:
+        alloc_change_str = f"up by {analysis_data.allocation_change_percentage_points:.1f} percentage points from yesterday (approx. {analysis_data.yesterday_allocation*100:.0f}%)."
+    elif analysis_data.allocation_change_percentage_points < -0.01:
+        alloc_change_str = f"down by {abs(analysis_data.allocation_change_percentage_points):.1f} percentage points from yesterday (approx. {analysis_data.yesterday_allocation*100:.0f}%)."
+    else:
+        alloc_change_str = f"remaining stable around {analysis_data.yesterday_allocation*100:.0f}% yesterday."
+    analysis_summary_str = f"For {analysis_data.target_label}, the current allocation is {analysis_data.current_allocation*100:.0f}% of AUM, {alloc_change_str}\n"
+    if analysis_data.earnings_surprises:
+        earnings_parts = []
+        for e in analysis_data.earnings_surprises:
+            direction = (
+                "beat estimates by" if e.surprise_pct >= 0 else "missed estimates by"
+            )
+            earnings_parts.append(f"{e.ticker} {direction} {abs(e.surprise_pct):.1f}%")
+        if earnings_parts:
+            analysis_summary_str += (
+                "Key earnings updates: " + ", ".join(earnings_parts) + "."
+            )
+        else:
+            analysis_summary_str += (
+                "No specific earnings surprises to highlight for this segment."
+            )
+    else:
+        analysis_summary_str += (
+            "No notable earnings surprises reported for this segment."
+        )
+    prompt = (
+        f"You are a professional financial assistant. Based on the user's query and the provided data, "
+        f"deliver a concise, spoken-style morning market brief for a portfolio manager. "
+        f"The brief should start with 'Good morning.'\n\n"
+        f"User Query: {user_query}\n\n"
+        f"Key Portfolio and Market Analysis:\n{analysis_summary_str}\n\n"
+        f"Relevant Filings Context (if any):\n{docs_context}\n\n"
+        f"If the user's query mentions a specific region or sector not covered by the 'Key Portfolio and Market Analysis', "
+        f"you can state that specific data for that exact query aspect was not available in the analysis provided. "
+        f"Mention any specific company earnings surprises from the analysis clearly (e.g., 'TSMC beat estimates by X%, Samsung missed by Y%')."
+        f"If there's information about broad regional sentiment or rising yields in the 'docs_context', incorporate it naturally. Otherwise, focus on the provided analysis."
+    )
+    return prompt
+generation_config = genai.types.GenerationConfig(
+    temperature=0.6, max_output_tokens=1024
+)
+safety_settings = [
+    {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
+    for c in [
+        "HARM_CATEGORY_HARASSMENT",
+        "HARM_CATEGORY_HATE_SPEECH",
+        "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "HARM_CATEGORY_DANGEROUS_CONTENT",
+    ]
+]
+@app.post("/generate_brief")
+async def generate_brief(request: BriefRequest):
+    if not GOOGLE_API_KEY:
+        raise HTTPException(status_code=500, detail="Google API Key not configured.")
+    logger.info(
+        f"Generating brief for query: '{request.user_query}' using Gemini model {GEMINI_MODEL_NAME}"
+    )
+    docs_context = (
+        "\n".join(request.retrieved_docs[:2])
+        if request.retrieved_docs
+        else "No relevant context from documents found."
+    )
+    full_prompt = construct_gemini_prompt(
+        user_query=request.user_query,
+        analysis_data=request.analysis,
+        docs_context=docs_context,
+    )
+    logger.debug(f"Full prompt for Gemini:\n{full_prompt}")
+    try:
+        model = genai.GenerativeModel(
+            model_name=GEMINI_MODEL_NAME,
+            generation_config=generation_config,
+            safety_settings=safety_settings,
+        )
+        max_retries = 1
+        retry_delay_seconds = 10
+        for attempt in range(max_retries + 1):
+            try:
+                response = await model.generate_content_async(full_prompt)
+                if not response.parts:
+                    if (
+                        response.prompt_feedback
+                        and response.prompt_feedback.block_reason
+                    ):
+                        block_reason_message = (
+                            response.prompt_feedback.block_reason_message
+                            or "Unknown safety block"
+                        )
+                        logger.error(
+                            f"Gemini content generation blocked. Reason: {block_reason_message}"
+                        )
+                        raise HTTPException(
+                            status_code=400,
+                            detail=f"Content generation blocked: {block_reason_message}",
+                        )
+                    else:
+                        logger.error("Gemini response has no parts (empty content).")
+                        if attempt == max_retries:
+                            raise HTTPException(
+                                status_code=500,
+                                detail="Gemini returned empty content after retries.",
+                            )
+                        else:
+                            logger.warning(
+                                f"Gemini returned empty content, attempt {attempt+1}/{max_retries+1}. Retrying..."
+                            )
+                            await asyncio.sleep(retry_delay_seconds)
+                            continue
+                brief_text = response.text
+                logger.info("Gemini content generated successfully.")
+                return {"brief": brief_text}
+            except (
+                genai.types.generation_types.BlockedPromptException,
+                genai.types.generation_types.StopCandidateException,
+            ) as sce_bpe:
+                logger.error(
+                    f"Gemini generation issue on attempt {attempt+1}: {sce_bpe}"
+                )
+                raise HTTPException(
+                    status_code=400, detail=f"Gemini generation issue: {sce_bpe}"
+                )
+            except Exception as e:
+                logger.error(
+                    f"Error during Gemini generation on attempt {attempt+1}: {type(e).__name__} - {e}"
+                )
+                if (
+                    "rate limit" in str(e).lower()
+                    or "quota" in str(e).lower()
+                    or "429" in str(e)
+                    or "resource_exhausted" in str(e).lower()
+                ):
+                    if attempt < max_retries:
+                        wait_time = retry_delay_seconds * (2**attempt)
+                        logger.info(f"Rate limit likely. Retrying in {wait_time}s...")
+                        await asyncio.sleep(wait_time)
+                        continue
+                    else:
+                        logger.error("Max retries reached for rate limit.")
+                        raise HTTPException(
+                            status_code=429,
+                            detail=f"Gemini API rate limit/quota exceeded: {e}",
+                        )
+                elif attempt < max_retries:
+                    await asyncio.sleep(retry_delay_seconds)
+                    continue
+                else:
+                    raise HTTPException(
+                        status_code=500,
+                        detail=f"Failed to generate brief with Gemini: {e}",
+                    )
+        raise HTTPException(
+            status_code=500, detail="Brief generation failed after all attempts."
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Critical error in /generate_brief: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Critical failure in brief generation: {e}"
+        )

agents/retriever_agent.py ADDED Viewed

	@@ -0,0 +1,186 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Dict, Optional
+from langchain_community.embeddings import SentenceTransformerEmbeddings
+import os
+from dotenv import load_dotenv
+from langchain_community.vectorstores import FAISS
+from langchain_core.embeddings import Embeddings
+import logging
+load_dotenv()
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Retriever Agent")
+FAISS_INDEX_PATH = os.getenv(
+    "FAISS_INDEX_PATH", "/app/faiss_index_store"
+)  # Path inside container
+EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")
+embedding_model_instance: Optional[Embeddings] = None
+vectorstore_instance: Optional[FAISS] = None
+def get_embedding_model() -> Embeddings:
+    """Initialize and return the SentenceTransformerEmbeddings model."""
+    global embedding_model_instance
+    if embedding_model_instance is None:
+        try:
+            logger.info(
+                f"Loading SentenceTransformerEmbeddings with model: {EMBEDDING_MODEL_NAME}"
+            )
+            embedding_model_instance = SentenceTransformerEmbeddings(
+                model_name=EMBEDDING_MODEL_NAME
+            )
+            logger.info(
+                f"SentenceTransformerEmbeddings model '{EMBEDDING_MODEL_NAME}' loaded successfully."
+            )
+        except Exception as e:
+            logger.error(
+                f"Error loading SentenceTransformerEmbeddings model '{EMBEDDING_MODEL_NAME}': {e}",
+                exc_info=True,
+            )
+            raise RuntimeError(f"Could not load embedding model: {e}")
+    return embedding_model_instance
+def get_vectorstore() -> FAISS:
+    """Load or create the FAISS vector store."""
+    global vectorstore_instance
+    if vectorstore_instance is None:
+        emb_model = get_embedding_model()
+        if os.path.exists(FAISS_INDEX_PATH) and os.path.isdir(FAISS_INDEX_PATH):
+            try:
+                logger.info(
+                    f"Attempting to load FAISS index from {FAISS_INDEX_PATH}..."
+                )
+                vectorstore_instance = FAISS.load_local(
+                    FAISS_INDEX_PATH,
+                    emb_model,
+                    allow_dangerous_deserialization=True,
+                )
+                logger.info(
+                    f"FAISS index loaded from {FAISS_INDEX_PATH}. Documents: {vectorstore_instance.index.ntotal if vectorstore_instance.index else 'N/A'}"
+                )
+            except Exception as e:
+                logger.error(
+                    f"Error loading FAISS index from {FAISS_INDEX_PATH}: {e}",
+                    exc_info=True,
+                )
+                logger.warning("Creating a new FAISS index due to loading error.")
+                try:
+                    vectorstore_instance = FAISS.from_texts(
+                        texts=["Initial dummy document for FAISS."],
+                        embedding=emb_model,
+                    )
+                    vectorstore_instance.save_local(FAISS_INDEX_PATH)
+                    logger.info(
+                        f"New FAISS index created with dummy doc and saved to {FAISS_INDEX_PATH}"
+                    )
+                except Exception as create_e:
+                    logger.error(
+                        f"Failed to create new FAISS index: {create_e}", exc_info=True
+                    )
+                    raise RuntimeError(f"Could not create new FAISS index: {create_e}")
+        else:
+            logger.info(
+                f"FAISS index path {FAISS_INDEX_PATH} not found or invalid. Creating new index."
+            )
+            try:
+                vectorstore_instance = FAISS.from_texts(
+                    texts=["Initial dummy document for FAISS."], embedding=emb_model
+                )
+                vectorstore_instance.save_local(FAISS_INDEX_PATH)
+                logger.info(f"New FAISS index created and saved to {FAISS_INDEX_PATH}")
+            except Exception as create_e:
+                logger.error(
+                    f"Failed to create new FAISS index: {create_e}", exc_info=True
+                )
+                raise RuntimeError(f"Could not create new FAISS index: {create_e}")
+    return vectorstore_instance
+class IndexRequest(BaseModel):
+    docs: List[str]
+class RetrieveRequest(BaseModel):
+    query: str
+    top_k: int = 3
+@app.post("/index")
+def index_docs(request: IndexRequest):
+    try:
+        vecstore = get_vectorstore()
+        if not request.docs:
+            logger.warning("No documents provided for indexing.")
+            return {
+                "status": "no documents provided",
+                "num_docs_in_store": vecstore.index.ntotal if vecstore.index else 0,
+            }
+        logger.info(f"Indexing {len(request.docs)} new documents.")
+        vecstore.add_texts(texts=request.docs)
+        vecstore.save_local(FAISS_INDEX_PATH)
+        logger.info(
+            f"Index updated and saved. Total documents in store: {vecstore.index.ntotal}"
+        )
+        return {"status": "indexed", "num_docs_in_store": vecstore.index.ntotal}
+    except Exception as e:
+        logger.error(f"Error during indexing: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Indexing failed: {str(e)}")
+@app.post("/retrieve")
+def retrieve(request: RetrieveRequest):
+    try:
+        vecstore = get_vectorstore()
+        if not vecstore.index or vecstore.index.ntotal == 0:
+            logger.warning(
+                "Vector store is empty or index not initialized. Cannot retrieve."
+            )
+            return {
+                "results": [],
+                "message": "Vector store is empty. Index documents first.",
+            }
+        if vecstore.index.ntotal == 1:
+            try:
+                first_doc_id = list(vecstore.docstore._dict.keys())[0]
+                first_doc_content = vecstore.docstore._dict[first_doc_id].page_content
+                if "Initial dummy document for FAISS" in first_doc_content:
+                    logger.warning(
+                        "Vector store contains only the initial dummy document."
+                    )
+            except Exception:
+                logger.warning(
+                    "Could not inspect docstore for dummy document, proceeding with retrieval."
+                )
+        logger.info(
+            f"Retrieving documents for query: '{request.query}' (top_k={request.top_k}). Total docs: {vecstore.index.ntotal}"
+        )
+        results_with_scores = vecstore.similarity_search_with_score(
+            query=request.query, k=request.top_k
+        )
+        formatted_results = [
+            {"doc": doc.page_content, "score": float(score)}
+            for doc, score in results_with_scores
+        ]
+        logger.info(f"Retrieved {len(formatted_results)} results.")
+        return {"results": formatted_results}
+    except Exception as e:
+        logger.error(f"Error during retrieval: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Retrieval failed: {str(e)}")

agents/scraping_agent.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import requests
+from fastapi import FastAPI, HTTPException, Query, status
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from data_ingestion.scraping_loader import (
+    get_earnings_surprises,
+    FMPError,
+)
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Scraping Agent (FMP Earnings)")
+class FilingRequest(BaseModel):
+    ticker: str
+    filing_type: Optional[str] = "earnings_surprise"
+    start_date: Optional[str] = None
+    end_date: Optional[str] = None
+@app.post("/get_filings")
+def get_filings(request: FilingRequest):
+    """
+    Fetches filings (earnings surprise) by calling the data_ingestion layer.
+    """
+    if request.filing_type != "earnings_surprise":
+        raise HTTPException(
+            status_code=400,
+            detail=f"Only 'earnings_surprise' filing_type supported in demo, received '{request.filing_type}'.",
+        )
+    try:
+        earnings_data_list = get_earnings_surprises(request.ticker)
+        return {
+            "ticker": request.ticker,
+            "filing_type": request.filing_type,
+            "data": earnings_data_list,
+        }
+    except (requests.exceptions.RequestException, FMPError) as e:
+        error_msg = f"Error fetching filings for {request.ticker}: {e}"
+        logger.error(error_msg)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=error_msg
+        )
+    except Exception as e:
+        error_msg = f"An unexpected error occurred processing {request.ticker}: {e}"
+        logger.error(error_msg)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=error_msg
+        )

agents/voice_agent.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# agents/voice_agent/main.py
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import Response  # Import Response for returning audio bytes
+from pydantic import BaseModel
+from gtts import gTTS
+import tempfile
+import os
+import logging
+from faster_whisper import WhisperModel  # For STT
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Voice Agent")
+# Get Whisper model size from environment
+WHISPER_MODEL_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")  # Default to 'small'
+# Initialize Whisper model once on startup
+try:
+    # Using cpu is generally safer for deployment unless you have a specific GPU setup
+    whisper_model = WhisperModel(WHISPER_MODEL_SIZE, device="cpu")
+    logger.info(f"Whisper model '{WHISPER_MODEL_SIZE}' loaded successfully on CPU.")
+except Exception as e:
+    logger.error(f"Error loading Whisper model '{WHISPER_MODEL_SIZE}': {e}")
+    # Depending on criticality, you might raise here or handle gracefully
+    whisper_model = None  # Set to None if loading failed
+class TTSRequest(BaseModel):
+    text: str
+    lang: str = "en"
+@app.post("/stt")
+async def stt(audio: UploadFile = File(...)):
+    """
+    Performs Speech-to-Text on an uploaded audio file.
+    """
+    if whisper_model is None:
+        raise HTTPException(status_code=503, detail="STT model not loaded.")
+    logger.info(f"Received audio file for STT: {audio.filename}")
+    # Save uploaded audio file to a temporary location
+    # Use .with_suffix('.wav') explicitly if needed, although whisper handles formats
+    suffix = os.path.splitext(audio.filename)[1] if audio.filename else ".wav"
+    tmp_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            audio_content = await audio.read()
+            tmp.write(audio_content)
+            tmp_path = tmp.name
+        logger.info(f"Audio saved to temporary file: {tmp_path}")
+        # Transcribe using faster-whisper
+        # max_int16 ensures compatibility, adjust as needed
+        segments, info = whisper_model.transcribe(
+            tmp_path, language=info.language if "info" in locals() else None
+        )
+        transcript = " ".join([seg.text for seg in segments]).strip()
+        logger.info(f"Transcription complete. Transcript: '{transcript}'")
+        return {"transcript": transcript}
+    except Exception as e:
+        logger.error(f"Error during STT processing: {e}")
+        raise HTTPException(status_code=500, detail=f"STT processing failed: {e}")
+    finally:
+        # Clean up temporary file
+        if tmp_path and os.path.exists(tmp_path):
+            os.remove(tmp_path)
+            logger.info(f"Temporary file removed: {tmp_path}")
+@app.post("/tts")
+def tts(request: TTSRequest):
+    """
+    Performs Text-to-Speech using gTTS.
+    Returns the audio data as a hex string (to match original orchestrator expectation).
+    NOTE: Returning raw bytes with media_type='audio/mpeg' is more standard for APIs.
+          This implementation keeps the hex encoding to avoid changing the orchestrator.
+    """
+    logger.info(
+        f"Generating TTS for text (lang={request.lang}): '{request.text[:50]}...'"
+    )
+    tmp_path = None
+    try:
+        # Create gTTS object
+        tts_obj = gTTS(text=request.text, lang=request.lang, slow=False)
+        # Save to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
+            tts_obj.save(tmp.name)
+            tmp_path = tmp.name
+        logger.info(f"TTS audio saved to temporary file: {tmp_path}")
+        # Read the audio file bytes
+        with open(tmp_path, "rb") as f:
+            audio_bytes = f.read()
+        logger.info(f"Read {len(audio_bytes)} bytes from temporary file.")
+        # Return as hex string as per original orchestrator expectation
+        audio_hex = audio_bytes.hex()
+        logger.info("Audio bytes converted to hex.")
+        return {"audio": audio_hex}
+        # --- Alternative (More standard API practice - requires orchestrator change) ---
+        # return Response(content=audio_bytes, media_type="audio/mpeg")
+        # ---------------------------------------------------------------------------
+    except Exception as e:
+        logger.error(f"Error during TTS processing: {e}")
+        raise HTTPException(status_code=500, detail=f"TTS processing failed: {e}")
+    finally:
+        # Clean up temporary file
+        if tmp_path and os.path.exists(tmp_path):
+            os.remove(tmp_path)
+            logger.info(f"Temporary file removed: {tmp_path}")

data_ingestion/__init__.py ADDED Viewed

File without changes

data_ingestion/api_loader.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import requests
+import os
+from dotenv import load_dotenv
+from typing import Dict, List, Optional, Any
+import logging
+load_dotenv()
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+FMP_API_KEY = os.getenv("FMP_API_KEY")
+ALPHAVANTAGE_API_KEY = os.getenv("ALPHAVANTAGE_API_KEY")
+FMP_BASE_URL = "https://financialmodelingprep.com/api/v3"
+ALPHAVANTAGE_BASE_URL = "https://www.alphavantage.co/query"
+class DataIngestionError(Exception):
+    """Custom exception for data ingestion API errors."""
+    pass
+class FMPFetchError(DataIngestionError):
+    """Specific error for FMP fetching issues."""
+    pass
+class AVFetchError(DataIngestionError):
+    """Specific error for AlphaVantage fetching issues."""
+    pass
+def _fetch_from_fmp(ticker: str, api_key: str) -> Dict[str, Dict[str, Any]]:
+    """Internal function to fetch data from FMP. Uses /historical-price-full/ as recommended."""
+    endpoint = f"{FMP_BASE_URL}/historical-price-full/{ticker}"
+    params = {"apikey": api_key}
+    logger.info(
+        f"Fetching historical daily data for {ticker} from FMP (using /historical-price-full/)."
+    )
+    try:
+        response = requests.get(endpoint, params=params, timeout=30)
+        response.raise_for_status()
+        data = response.json()
+        if isinstance(data, dict):
+            if "Error Message" in data:
+                raise FMPFetchError(
+                    f"FMP API returned error for {ticker}: {data['Error Message']}"
+                )
+            if data.get("symbol") and "historical" in data:
+                historical_data_list = data.get("historical")
+                if isinstance(historical_data_list, list):
+                    if not historical_data_list:
+                        logger.warning(
+                            f"FMP API returned empty historical data list for {ticker} (from /historical-price-full/)."
+                        )
+                        return {}
+                    prices_dict: Dict[str, Dict[str, Any]] = {}
+                    for record in historical_data_list:
+                        if isinstance(record, dict) and "date" in record:
+                            prices_dict[record["date"]] = record
+                        else:
+                            logger.warning(
+                                f"Skipping invalid FMP record format for {ticker}: {record}"
+                            )
+                    logger.info(
+                        f"Successfully fetched and formatted {len(prices_dict)} historical records for {ticker} from FMP."
+                    )
+                    return prices_dict
+                else:
+                    raise FMPFetchError(
+                        f"FMP API historical data for {ticker} has unexpected 'historical' type: {type(historical_data_list)}"
+                    )
+            else:
+                raise FMPFetchError(
+                    f"FMP API response for {ticker} (from /historical-price-full/) missing expected structure (symbol/historical keys). Response: {str(data)[:200]}"
+                )
+        elif isinstance(data, list):
+            if not data:
+                logger.warning(
+                    f"FMP API returned empty list for {ticker} (from /historical-price-full/)."
+                )
+                return {}
+            if isinstance(data[0], dict) and (
+                "Error Message" in data[0] or "error" in data[0]
+            ):
+                error_msg = data[0].get(
+                    "Error Message", data[0].get("error", "Unknown error in list")
+                )
+                raise FMPFetchError(
+                    f"FMP API returned error list for {ticker}: {error_msg}"
+                )
+            else:
+                raise FMPFetchError(
+                    f"FMP API returned unexpected top-level list structure for {ticker} (from /historical-price-full/). Response: {str(data)[:200]}"
+                )
+        else:
+            raise FMPFetchError(
+                f"FMP API returned unexpected response type for {ticker} (from /historical-price-full/): {type(data)}. Response: {str(data)[:200]}"
+            )
+    except requests.exceptions.RequestException as e:
+        raise FMPFetchError(f"FMP data fetch (network) failed for {ticker}: {e}")
+    except Exception as e:
+        raise FMPFetchError(
+            f"FMP data fetch (processing) failed for {ticker}: {e}. Response: {str(locals().get('data', 'N/A'))[:200]}"
+        )
+def _fetch_from_alphavantage(ticker: str, api_key: str) -> Dict[str, Dict[str, Any]]:
+    """Internal function to fetch data from AlphaVantage."""
+    endpoint = f"{ALPHAVANTAGE_BASE_URL}/query"
+    params = {
+        "function": "TIME_SERIES_DAILY_ADJUSTED",
+        "symbol": ticker,
+        "apikey": api_key,
+        "outputsize": "compact",
+    }
+    logger.info(f"Fetching historical daily data for {ticker} from AlphaVantage.")
+    try:
+        response = requests.get(endpoint, params=params, timeout=30)
+        response.raise_for_status()
+        data = response.json()
+        if not isinstance(data, dict):
+            raise AVFetchError(
+                f"AlphaVantage API returned unexpected response type for {ticker}: {type(data)}. Expected dict. Response: {str(data)[:200]}"
+            )
+        if "Error Message" in data:
+            raise AVFetchError(
+                f"AlphaVantage API returned error for {ticker}: {data['Error Message']}"
+            )
+        if "Note" in data:
+            logger.warning(
+                f"AlphaVantage API returned note for {ticker}: {data['Note']} - treating as no data."
+            )
+            return {}
+        time_series_data = data.get("Time Series (Daily)")
+        if time_series_data is None:
+            if not data:
+                logger.warning(
+                    f"AlphaVantage API returned an empty dictionary for {ticker}."
+                )
+                return {}
+            else:
+                raise AVFetchError(
+                    f"AlphaVantage API response for {ticker} missing 'Time Series (Daily)' key. Response: {str(data)[:200]}"
+                )
+        if not isinstance(time_series_data, dict):
+            raise AVFetchError(
+                f"AlphaVantage API 'Time Series (Daily)' for {ticker} is not a dictionary. Type: {type(time_series_data)}. Response: {str(data)[:200]}"
+            )
+        if not time_series_data:
+            logger.warning(
+                f"AlphaVantage API returned empty time series data for {ticker}."
+            )
+            return {}
+        prices_dict: Dict[str, Dict[str, Any]] = {}
+        for date_str, values_dict in time_series_data.items():
+            if isinstance(values_dict, dict):
+                cleaned_values: Dict[str, Any] = {}
+                if "1. open" in values_dict:
+                    cleaned_values["open"] = values_dict["1. open"]
+                if "2. high" in values_dict:
+                    cleaned_values["high"] = values_dict["2. high"]
+                if "3. low" in values_dict:
+                    cleaned_values["low"] = values_dict["3. low"]
+                if "4. close" in values_dict:
+                    cleaned_values["close"] = values_dict["4. close"]
+                if "5. adjusted close" in values_dict:
+                    cleaned_values["adjClose"] = values_dict["5. adjusted close"]
+                if "6. volume" in values_dict:
+                    cleaned_values["volume"] = values_dict["6. volume"]
+                if cleaned_values:
+                    prices_dict[date_str] = cleaned_values
+                else:
+                    logger.warning(
+                        f"AlphaVantage data for {ticker} on {date_str} missing expected price keys within daily record."
+                    )
+            else:
+                logger.warning(
+                    f"Skipping invalid AlphaVantage daily record (not a dict) for {ticker} on {date_str}: {values_dict}"
+                )
+        logger.info(
+            f"Successfully fetched and formatted {len(prices_dict)} historical records for {ticker} from AlphaVantage."
+        )
+        return prices_dict
+    except requests.exceptions.RequestException as e:
+        raise AVFetchError(
+            f"AlphaVantage data fetch (network) failed for {ticker}: {e}"
+        )
+    except Exception as e:
+        raise AVFetchError(
+            f"AlphaVantage data fetch (processing) failed for {ticker}: {e}. Response: {str(locals().get('data', 'N/A'))[:200]}"
+        )
+def get_daily_adjusted_prices(ticker: str) -> Dict[str, Dict[str, Any]]:
+    """
+    Fetches historical daily adjusted prices for a single ticker.
+    Tries FMP first if key is available. If FMP fails, tries AlphaVantage if key is available.
+    Returns a dictionary mapping date strings to price dictionaries.
+    Raises DataIngestionError if no keys are configured or if both APIs fail.
+    """
+    fmp_key_available = bool(FMP_API_KEY)
+    av_key_available = bool(ALPHAVANTAGE_API_KEY)
+    if not fmp_key_available and not av_key_available:
+        raise DataIngestionError(
+            "No API keys configured for historical price data (FMP, AlphaVantage)."
+        )
+    fmp_error_detail = None
+    av_error_detail = None
+    data_from_fmp = {}
+    data_from_av = {}
+    if fmp_key_available:
+        try:
+            data_from_fmp = _fetch_from_fmp(ticker, FMP_API_KEY)
+            if data_from_fmp:
+                return data_from_fmp
+            else:
+                fmp_error_detail = f"FMP API returned no data for {ticker}."
+                logger.warning(fmp_error_detail)
+        except FMPFetchError as e:
+            fmp_error_detail = str(e)
+            logger.error(f"FMPFetchError for {ticker}: {fmp_error_detail}")
+        except Exception as e:
+            fmp_error_detail = (
+                f"An unexpected error occurred during FMP fetch for {ticker}: {e}"
+            )
+            logger.error(fmp_error_detail)
+    if av_key_available:
+        try:
+            data_from_av = _fetch_from_alphavantage(ticker, ALPHAVANTAGE_API_KEY)
+            if data_from_av:
+                return data_from_av
+            else:
+                av_error_detail = f"AlphaVantage API returned no data for {ticker}."
+                logger.warning(av_error_detail)
+        except AVFetchError as e:
+            av_error_detail = str(e)
+            logger.error(f"AVFetchError for {ticker}: {av_error_detail}")
+        except Exception as e:
+            av_error_detail = f"An unexpected error occurred during AlphaVantage fetch for {ticker}: {e}"
+            logger.error(av_error_detail)
+    error_messages = []
+    if fmp_key_available:
+        if fmp_error_detail:
+            error_messages.append(f"FMP: {fmp_error_detail}")
+        elif not data_from_fmp:
+            error_messages.append(f"FMP: Returned no data for {ticker}.")
+    if av_key_available:
+        if av_error_detail:
+            error_messages.append(f"AlphaVantage: {av_error_detail}")
+        elif not data_from_av:
+            error_messages.append(f"AlphaVantage: Returned no data for {ticker}.")
+    providers_tried = []
+    if fmp_key_available:
+        providers_tried.append("FMP")
+    if av_key_available:
+        providers_tried.append("AlphaVantage")
+    final_message = f"Failed to fetch historical data for {ticker} after trying {', '.join(providers_tried) if providers_tried else 'available providers'}."
+    if error_messages:
+        final_message += " Details: " + "; ".join(error_messages)
+    else:
+        final_message += " No data was returned from any attempted source."
+    raise DataIngestionError(final_message)

data_ingestion/document_loader.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import List, Dict, Any
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def load_text_documents(filepaths: List[str]) -> List[str]:
+    """
+    Loads text content from a list of file paths. (Placeholder)
+    """
+    loaded_docs = []
+    logger.info(
+        f"Attempting to load documents from {len(filepaths)} file paths (placeholder)."
+    )
+    for path in filepaths:
+        try:
+            if os.path.exists(path):
+                with open(path, "r", encoding="utf-8") as f:
+                    content = f.read()
+                    loaded_docs.append(content)
+                    logger.info(f"Successfully loaded content from {path} (simulated).")
+            else:
+                logger.warning(f"File not found: {path}")
+                loaded_docs.append(
+                    f"Could not load content from {path}: File not found."
+                )
+        except Exception as e:
+            logger.error(f"Error loading document from {path}: {e}")
+            loaded_docs.append(f"Could not load content from {path}: {e}")
+    return loaded_docs

data_ingestion/scraping_loader.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import requests
+import os
+from dotenv import load_dotenv
+from typing import List, Dict, Any
+import logging
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+FMP_API_KEY = os.getenv("FMP_API_KEY")
+if not FMP_API_KEY:
+    logger.warning("FMP_API_KEY not found. FMP calls will fail.")
+FMP_BASE_URL = "https://financialmodelingprep.com/api/v3"
+class FMPError(Exception):
+    """Custom exception for FMP API errors."""
+    pass
+def get_earnings_surprises(ticker: str) -> List[Dict[str, Any]]:
+    """
+    Fetches earnings surprise data for a single ticker from Financial Modeling Prep.
+    Returns a list of earnings surprise records.
+    Raises FMPError on API-specific issues.
+    Raises requests.RequestException on network issues.
+    """
+    if not FMP_API_KEY:
+        raise FMPError("FMP API Key not configured.")
+    endpoint = f"{FMP_BASE_URL}/earning_surprise/{ticker}"
+    params = {"apikey": FMP_API_KEY}
+    logger.info(f"Fetching earnings surprise data for {ticker} from FMP.")
+    response = requests.get(endpoint, params=params, timeout=30)
+    response.raise_for_status()
+    data = response.json()
+    if isinstance(data, list):
+        return data
+    else:
+        logger.error(f"Unexpected FMP response structure for {ticker}: {data}")
+        if isinstance(data, dict) and data.get("error"):
+            raise FMPError(f"FMP API returned error for {ticker}: {data['error']}")
+        if isinstance(data, dict) and not data:
+            logger.warning(
+                f"FMP API returned empty response for {ticker}, potentially no data."
+            )
+            return []
+        raise FMPError(f"Unexpected API response structure for {ticker}.")

docker-compose.yaml ADDED Viewed

	@@ -0,0 +1,138 @@

+version: '3.8'
+services:
+  api_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.api_agent:app --host 0.0.0.0 --port 8001
+    volumes:
+      - .:/app
+    ports:
+      - "8001:8001"
+    environment:
+      - FMP_API_KEY=${FMP_API_KEY}
+      - ALPHAVANTAGE_API_KEY=${ALPHAVANTAGE_API_KEY}
+    networks:
+      - agent_network
+  scraping_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.scraping_agent:app --host 0.0.0.0 --port 8002
+    volumes:
+      - .:/app
+    ports:
+      - "8002:8002"
+    environment:
+      - FMP_API_KEY=${FMP_API_KEY}
+    networks:
+      - agent_network
+  retriever_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.retriever_agent:app --host 0.0.0.0 --port 8003
+    volumes:
+      - .:/app
+      - faiss_index_volume:/app/faiss_index_store
+    ports:
+      - "8003:8003"
+    environment:
+      - FAISS_INDEX_PATH=/app/faiss_index_store
+    networks:
+      - agent_network
+  analysis_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.analysis_agent:app --host 0.0.0.0 --port 8004
+    volumes:
+      - .:/app
+    ports:
+      - "8004:8004"
+    networks:
+      - agent_network
+  language_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.language_agent:app --host 0.0.0.0 --port 8005
+    volumes:
+      - .:/app
+    ports:
+      - "8005:8005"
+    environment:
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY}
+      - GEMINI_MODEL_NAME=${GEMINI_MODEL_NAME:-gemini-1.5-flash-latest}
+    networks:
+      - agent_network
+  voice_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn agents.voice_agent:app --host 0.0.0.0 --port 8006
+    volumes:
+      - .:/app
+    ports:
+      - "8006:8006"
+    environment:
+      - WHISPER_MODEL_SIZE=${WHISPER_MODEL_SIZE:-small}
+    networks:
+      - agent_network
+  orchestrator:
+    build:
+      context: .
+      dockerfile: Dockerfile.fastapi
+    command: uvicorn orchestrator.main:app --host 0.0.0.0 --port 8000
+    volumes:
+      - .:/app
+      - ./example_portfolio.json:/app/example_portfolio.json
+    ports:
+      - "8000:8000"
+    environment:
+      - AGENT_API_URL=http://api_agent:8001
+      - AGENT_SCRAPING_URL=http://scraping_agent:8002
+      - AGENT_RETRIEVER_URL=http://retriever_agent:8003
+      - AGENT_ANALYSIS_URL=http://analysis_agent:8004
+      - AGENT_LANGUAGE_URL=http://language_agent:8005
+      - AGENT_VOICE_URL=http://voice_agent:8006
+    depends_on:
+      - api_agent
+      - scraping_agent
+      - retriever_agent
+      - analysis_agent
+      - language_agent
+      - voice_agent
+    networks:
+      - agent_network
+  streamlit_app:
+    build:
+      context: .
+      dockerfile: Dockerfile.streamlit
+    command: streamlit run streamlit/app.py --server.port=8501 --server.address=0.0.0.0 --browser.gatherUsageStats=false
+    volumes:
+      - ./streamlit:/app/streamlit
+      - ./example_portfolio.json:/app/example_portfolio.json
+    ports:
+      - "8501:8501"
+    environment:
+      - ORCHESTRATOR_URL=http://orchestrator:8000
+    depends_on:
+      - orchestrator
+    networks:
+      - agent_network
+volumes:
+  faiss_index_volume:
+networks:
+  agent_network:
+    driver: bridge

example_portfolio.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "TSM": {"weight": 0.22, "country": "Taiwan", "sector": "Technology", "name": "TSMC ADR"},
+    "AAPL": {"weight": 0.15, "country": "USA", "sector": "Technology", "name": "Apple Inc."},
+    "MSFT": {"weight": 0.10, "country": "USA", "sector": "Technology", "name": "Microsoft Corp."},
+    "JNJ": {"weight": 0.08, "country": "USA", "sector": "Healthcare", "name": "Johnson & Johnson"},
+    "BABA": {"weight": 0.05, "country": "China", "sector": "Technology", "name": "Alibaba Group ADR"},
+    "ASML": {"weight": 0.07, "country": "Netherlands", "sector": "Technology", "name": "ASML Holding NV ADR (Europe Tech)"},
+    "NVDA": {"weight": 0.12, "country": "USA", "sector": "Technology", "name": "NVIDIA Corp."},
+    "GOOGL": {"weight": 0.11, "country": "USA", "sector": "Technology", "name": "Alphabet Inc. (Google)"},
+    "INTC": {"weight": 0.10, "country": "USA", "sector": "Technology", "name": "Intel Corp."}
+}

orchestrator/main.py ADDED Viewed

	@@ -0,0 +1,616 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException, status
+from pydantic import BaseModel
+import httpx
+import os
+from dotenv import load_dotenv
+from langgraph.graph import StateGraph, END
+from typing import Dict, List, Optional, Any, Union
+import logging
+import json
+load_dotenv()
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Orchestrator (Generalized)")
+AGENT_API_URL = os.getenv("AGENT_API_URL", "http://localhost:8001")
+AGENT_SCRAPING_URL = os.getenv("AGENT_SCRAPING_URL", "http://localhost:8002")
+AGENT_RETRIEVER_URL = os.getenv("AGENT_RETRIEVER_URL", "http://localhost:8003")
+AGENT_ANALYSIS_URL = os.getenv("AGENT_ANALYSIS_URL", "http://localhost:8004")
+AGENT_LANGUAGE_URL = os.getenv("AGENT_LANGUAGE_URL", "http://localhost:8005")
+AGENT_VOICE_URL = os.getenv("AGENT_VOICE_URL", "http://localhost:8006")
+class EarningsSurpriseRecordState(BaseModel):
+    date: str
+    symbol: str
+    actual: Union[float, int, str, None] = None
+    estimate: Union[float, int, str, None] = None
+    difference: Union[float, int, str, None] = None
+    surprisePercentage: Union[float, int, str, None] = None
+class MarketBriefState(BaseModel):
+    audio_input: Optional[bytes] = None
+    user_text: Optional[str] = None
+    nlu_results: Optional[Dict[str, str]] = None
+    target_tickers_for_data_fetch: List[str] = []
+    market_data: Optional[Dict[str, Dict[str, float]]] = None
+    filings: Optional[Dict[str, List[EarningsSurpriseRecordState]]] = None
+    indexed: bool = False
+    retrieved_docs: Optional[List[str]] = None
+    analysis: Optional[Dict[str, Any]] = None
+    brief: Optional[str] = None
+    audio_output: Optional[bytes] = None
+    errors: List[str] = []
+    warnings: List[str] = []
+    class Config:
+        arbitrary_types_allowed = True
+EXAMPLE_PORTFOLIO_FILE = "example_portfolio.json"
+EXAMPLE_METADATA_FILE = "example_metadata.json"
+def load_example_data(file_path: str, default_data: Dict) -> Dict:
+    if os.path.exists(file_path):
+        try:
+            with open(file_path, "r") as f:
+                return json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load {file_path}: {e}. Using default.")
+    return default_data
+EXAMPLE_PORTFOLIO = load_example_data(
+    EXAMPLE_PORTFOLIO_FILE,
+    {
+        "TSM": {
+            "weight": 0.22,
+            "country": "Taiwan",
+            "sector": "Technology",
+        },
+        "AAPL": {"weight": 0.15, "country": "USA", "sector": "Technology"},
+        "MSFT": {"weight": 0.10, "country": "USA", "sector": "Technology"},
+        "JNJ": {"weight": 0.08, "country": "USA", "sector": "Healthcare"},
+        "BABA": {
+            "weight": 0.05,
+            "country": "China",
+            "sector": "Technology",
+        },
+    },
+)
+async def call_agent(
+    client: httpx.AsyncClient,
+    url: str,
+    method: str = "post",
+    json_payload: Optional[Dict] = None,
+    files_payload: Optional[Dict] = None,
+    timeout: float = 60.0,
+) -> Dict:
+    try:
+        logger.info(
+            f"Calling agent at {url} with payload keys: {list(json_payload.keys()) if json_payload else 'N/A'}"
+        )
+        if method == "post":
+            if json_payload:
+                response = await client.post(url, json=json_payload, timeout=timeout)
+            elif files_payload:
+                response = await client.post(url, files=files_payload, timeout=timeout)
+            else:
+                raise ValueError("POST request requires json_payload or files_payload.")
+        elif method == "get":
+            response = await client.get(url, params=json_payload, timeout=timeout)
+        else:
+            raise ValueError(f"Unsupported method: {method}")
+        response.raise_for_status()
+        logger.info(f"Agent at {url} returned status {response.status_code}.")
+        return response.json()
+    except httpx.ConnectError as e:
+        error_msg = f"Connection error calling agent at {url}: {e}"
+        logger.error(error_msg)
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=error_msg
+        )
+    except httpx.RequestError as e:
+        error_msg = f"Request error calling agent at {url}: {e}"
+        logger.error(error_msg)
+        raise HTTPException(
+            status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail=error_msg
+        )
+    except httpx.HTTPStatusError as e:
+        error_msg = f"HTTP error calling agent at {url}: {e.response.status_code} - {e.response.text[:200]}"
+        logger.error(error_msg)
+        raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
+    except Exception as e:
+        error_msg = f"An unexpected error occurred calling agent at {url}: {e}"
+        logger.error(error_msg, exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=error_msg
+        )
+async def stt_node(state: MarketBriefState) -> MarketBriefState:
+    async with httpx.AsyncClient() as client:
+        if not state.audio_input:
+            state.errors.append("STT Node: No audio input provided.")
+            logger.error(state.errors[-1])
+            state.user_text = "Error: No audio provided for STT."
+            return state
+        files = {"audio": ("input.wav", state.audio_input, "audio/wav")}
+        try:
+            response_data = await call_agent(
+                client, f"{AGENT_VOICE_URL}/stt", files_payload=files
+            )
+            if "transcript" in response_data:
+                state.user_text = response_data["transcript"]
+                logger.info(f"STT successful. Transcript: {state.user_text[:50]}...")
+            else:
+                error_msg = f"STT agent response missing 'transcript': {response_data}"
+                logger.error(error_msg)
+                state.errors.append(error_msg)
+                state.user_text = "Error: STT failed."
+        except HTTPException as e:
+            state.errors.append(f"STT Node failed: {e.detail}")
+            state.user_text = "Error: STT service unavailable or failed."
+    return state
+async def nlu_node(state: MarketBriefState) -> MarketBriefState:
+    """(NEW) Calls an NLU process (simulated here) to extract intent."""
+    if not state.user_text or "Error:" in state.user_text:
+        state.warnings.append(
+            "NLU Node: Skipping due to missing or error in user_text."
+        )
+        state.nlu_results = {
+            "region": "Global",
+            "sector": "Overall Portfolio",
+        }
+        return state
+    logger.info(f"NLU Node: Processing query: '{state.user_text}'")
+    query_lower = state.user_text.lower()
+    region = "Global"
+    sector = "Overall Portfolio"
+    if "asia" in query_lower and "tech" in query_lower:
+        region = "Asia"
+        sector = "Technology"
+        logger.info("NLU Simulation: Detected 'Asia' and 'Tech'.")
+    elif "us" in query_lower or "usa" in query_lower or "america" in query_lower:
+        region = "USA"
+        if "tech" in query_lower:
+            sector = "Technology"
+        elif "health" in query_lower:
+            sector = "Healthcare"
+        logger.info(f"NLU Simulation: Detected Region '{region}', Sector '{sector}'.")
+    state.nlu_results = {"region": region, "sector": sector}
+    logger.info(f"NLU Node: Results: {state.nlu_results}")
+    target_tickers = []
+    portfolio_keys = list(EXAMPLE_PORTFOLIO.keys())
+    if region == "Global" and (
+        sector == "Overall Portfolio" or sector == "Overall Market"
+    ):
+        target_tickers = portfolio_keys
+    else:
+        for ticker, details in EXAMPLE_PORTFOLIO.items():
+            matches_region = region == "Global"
+            if region == "Asia" and details.get("country") in [
+                "Taiwan",
+                "China",
+                "Korea",
+                "Japan",
+                "India",
+            ]:
+                matches_region = True
+            elif region == "USA" and details.get("country") == "USA":
+                matches_region = True
+            matches_sector = sector == "Overall Portfolio" or sector == "Overall Market"
+            if sector.lower() == details.get("sector", "").lower():
+                matches_sector = True
+            if matches_region and matches_sector:
+                target_tickers.append(ticker)
+    if not target_tickers and portfolio_keys:
+        logger.warning(
+            f"NLU filtering yielded no specific tickers for {region}/{sector}, defaulting to all portfolio tickers."
+        )
+        target_tickers = portfolio_keys
+        state.nlu_results["region_effective"] = "Global"
+        state.nlu_results["sector_effective"] = "Overall Portfolio"
+    state.target_tickers_for_data_fetch = list(set(target_tickers))
+    logger.info(
+        f"NLU Node: Target tickers for data fetch: {state.target_tickers_for_data_fetch}"
+    )
+    if not state.target_tickers_for_data_fetch:
+        state.warnings.append(
+            "NLU Node: No target tickers identified for data fetching based on query and portfolio."
+        )
+    return state
+async def api_agent_node(state: MarketBriefState) -> MarketBriefState:
+    if not state.target_tickers_for_data_fetch:
+        state.warnings.append(
+            "API Agent Node: No target tickers to fetch market data for. Skipping."
+        )
+        state.market_data = {}
+        return state
+    async with httpx.AsyncClient() as client:
+        payload = {
+            "tickers": state.target_tickers_for_data_fetch,
+            "data_type": "adjClose",
+        }
+        try:
+            response_data = await call_agent(
+                client, f"{AGENT_API_URL}/get_market_data", json_payload=payload
+            )
+            state.market_data = response_data.get("result", {})
+            logger.info(
+                f"API Agent successful. Fetched data for tickers: {list(state.market_data.keys()) if state.market_data else 'None'}"
+            )
+            if response_data.get("errors"):
+                state.warnings.append(
+                    f"API Agent reported errors: {response_data['errors']}"
+                )
+            if response_data.get("warnings"):
+                state.warnings.extend(response_data.get("warnings", []))
+        except HTTPException as e:
+            state.errors.append(
+                f"API Agent Node failed for tickers {state.target_tickers_for_data_fetch}: {e.detail}"
+            )
+            state.market_data = {}
+    return state
+async def scraping_agent_node(state: MarketBriefState) -> MarketBriefState:
+    if not state.target_tickers_for_data_fetch:
+        state.warnings.append(
+            "Scraping Agent Node: No target tickers to fetch earnings for. Skipping."
+        )
+        state.filings = {}
+        return state
+    async with httpx.AsyncClient() as client:
+        filings_data: Dict[str, List[Dict[str, Any]]] = {}
+        for ticker in state.target_tickers_for_data_fetch:
+            payload = {"ticker": ticker, "filing_type": "earnings_surprise"}
+            try:
+                response_data = await call_agent(
+                    client, f"{AGENT_SCRAPING_URL}/get_filings", json_payload=payload
+                )
+                if "data" in response_data and isinstance(response_data["data"], list):
+                    filings_data[ticker] = response_data["data"]
+                    logger.info(
+                        f"Scraping Agent got {len(response_data['data'])} records for {ticker}."
+                    )
+                    if not response_data["data"]:
+                        logger.info(
+                            f"Scraping Agent for {ticker} returned 0 earnings surprise records."
+                        )
+                else:
+                    filings_data[ticker] = []
+                    state.errors.append(
+                        f"Scraping agent for {ticker} returned malformed data: {str(response_data)[:100]}"
+                    )
+            except HTTPException as e:
+                state.errors.append(
+                    f"Scraping Agent Node failed for {ticker}: {e.detail}"
+                )
+                filings_data[ticker] = []
+        state.filings = filings_data
+    return state
+async def retriever_agent_node(state: MarketBriefState) -> MarketBriefState:
+    async with httpx.AsyncClient() as client:
+        docs_to_index = []
+        if state.filings:
+            for (
+                ticker,
+                records_list,
+            ) in state.filings.items():
+                if records_list:
+                    doc_content = f"Earnings surprise data for {ticker}:\n" + "\n".join(
+                        [
+                            f"Date: {r.get('date', 'N/A')}, Symbol: {r.get('symbol', 'N/A')}, "
+                            f"Actual: {r.get('actual', 'N/A')}, Estimate: {r.get('estimate', 'N/A')}, "
+                            f"Surprise%: {r.get('surprisePercentage', 'N/A')}"
+                            for r in records_list
+                        ]
+                    )
+                    docs_to_index.append(doc_content)
+        if docs_to_index:
+            try:
+                pass
+            except Exception as e:
+                state.errors.append(f"Retriever indexing failed: {e}")
+                state.indexed = False
+        else:
+            state.indexed = False
+            logger.info("Retriever: No new documents to index.")
+        if state.user_text:
+            try:
+                pass
+            except Exception as e:
+                state.errors.append(f"Retriever retrieval failed: {e}")
+                state.retrieved_docs = []
+        else:
+            state.retrieved_docs = []
+    return state
+async def analysis_agent_node(state: MarketBriefState) -> MarketBriefState:
+    if not state.market_data and not state.filings:
+        state.warnings.append(
+            "Analysis Agent Node: No market data or filings available. Skipping analysis."
+        )
+        state.analysis = None
+        return state
+    async with httpx.AsyncClient() as client:
+        nlu_res = state.nlu_results if state.nlu_results else {}
+        region_label = nlu_res.get("region_effective", nlu_res.get("region", "Global"))
+        sector_label = nlu_res.get(
+            "sector_effective", nlu_res.get("sector", "Overall Portfolio")
+        )
+        if region_label == "Global" and (
+            sector_label == "Overall Portfolio" or sector_label == "Overall Market"
+        ):
+            target_label_for_analysis = "Overall Portfolio"
+        else:
+            target_label_for_analysis = (
+                f"{region_label.replace('USA', 'US')} {sector_label} Stocks".strip()
+            )
+        analysis_target_tickers = state.target_tickers_for_data_fetch
+        current_portfolio_weights = {
+            ticker: details["weight"] for ticker, details in EXAMPLE_PORTFOLIO.items()
+        }
+        payload = {
+            "portfolio": current_portfolio_weights,
+            "market_data": state.market_data if state.market_data else {},
+            "earnings_data": (state.filings if state.filings else {}),
+            "target_tickers": analysis_target_tickers,
+            "target_label": target_label_for_analysis,
+        }
+        try:
+            response_data = await call_agent(
+                client, f"{AGENT_ANALYSIS_URL}/analyze", json_payload=payload
+            )
+            state.analysis = response_data
+            logger.info(
+                f"Analysis Agent successful for '{response_data.get('target_label')}'."
+            )
+        except HTTPException as e:
+            state.errors.append(f"Analysis Agent Node failed: {e.detail}")
+            state.analysis = None
+    return state
+async def language_agent_node(state: MarketBriefState) -> MarketBriefState:
+    async with httpx.AsyncClient() as client:
+        if not state.user_text or "Error:" in state.user_text:
+            state.errors.append("Language Agent: Skipping due to no valid user text.")
+            state.brief = (
+                "I could not understand your query or there was an earlier error."
+            )
+            return state
+        analysis_payload_for_llm: Dict[str, Any]
+        if state.analysis and isinstance(state.analysis, dict):
+            analysis_payload_for_llm = {
+                "target_label": state.analysis.get("target_label", "the portfolio"),
+                "current_allocation": state.analysis.get("current_allocation", 0.0),
+                "yesterday_allocation": state.analysis.get("yesterday_allocation", 0.0),
+                "allocation_change_percentage_points": state.analysis.get(
+                    "allocation_change_percentage_points", 0.0
+                ),
+                "earnings_surprises_for_target": state.analysis.get(
+                    "earnings_surprises_for_target", []
+                ),
+            }
+        else:
+            logger.warning(
+                "Language Agent: Analysis data is missing or not a dict. Using defaults."
+            )
+            state.warnings.append(
+                "Language Agent: Analysis data unavailable, brief will be general."
+            )
+            analysis_payload_for_llm = {
+                "target_label": "the portfolio (analysis data missing)",
+                "current_allocation": 0.0,
+                "yesterday_allocation": 0.0,
+                "allocation_change_percentage_points": 0.0,
+                "earnings_surprises_for_target": [],
+            }
+        payload = {
+            "user_query": state.user_text,
+            "analysis": analysis_payload_for_llm,
+            "retrieved_docs": state.retrieved_docs if state.retrieved_docs else [],
+        }
+        try:
+            response_data = await call_agent(
+                client, f"{AGENT_LANGUAGE_URL}/generate_brief", json_payload=payload
+            )
+            state.brief = response_data.get("brief")
+            logger.info(f"Language Agent successful. Brief: {state.brief[:70]}...")
+        except HTTPException as e:
+            state.errors.append(f"Language Agent Node failed: {e.detail}")
+            state.brief = "Sorry, I couldn't generate the brief at this time due to an internal error."
+    return state
+async def tts_node(state: MarketBriefState) -> MarketBriefState:
+    brief_text_for_tts = state.brief
+    if state.errors and (
+        not state.brief
+        or "sorry" in state.brief.lower()
+        or "error" in state.brief.lower()
+    ):
+        error_count = len(state.errors)
+        brief_text_for_tts = f"I encountered {error_count} error{'s' if error_count > 1 else ''} while processing your request. Please check the detailed report."
+        logger.warning(
+            f"TTS Node: Generating audio for error summary due to {error_count} errors in state."
+        )
+    elif not state.brief:
+        brief_text_for_tts = "The market brief could not be generated."
+        logger.warning("TTS Node: No brief text available from language agent.")
+        state.warnings.append("TTS Node: No brief content to synthesize.")
+    if not brief_text_for_tts:
+        state.audio_output = None
+        return state
+    async with httpx.AsyncClient() as client:
+        payload = {"text": brief_text_for_tts, "lang": "en"}
+        try:
+            response_data = await call_agent(
+                client, f"{AGENT_VOICE_URL}/tts", json_payload=payload
+            )
+            if "audio" in response_data and isinstance(response_data["audio"], str):
+                state.audio_output = bytes.fromhex(response_data["audio"])
+                logger.info("TTS successful. Audio bytes received.")
+            else:
+                state.errors.append(
+                    f"TTS Agent response missing or invalid 'audio': {str(response_data)[:100]}"
+                )
+                state.audio_output = None
+        except HTTPException as e:
+            state.errors.append(f"TTS Node failed: {e.detail}")
+            state.audio_output = None
+    return state
+def build_market_brief_graph():
+    builder = StateGraph(MarketBriefState)
+    builder.add_node("stt", stt_node)
+    builder.add_node("nlu", nlu_node)
+    builder.add_node("api_agent", api_agent_node)
+    builder.add_node("scraping_agent", scraping_agent_node)
+    builder.add_node("retriever_agent", retriever_agent_node)
+    builder.add_node("analysis_agent", analysis_agent_node)
+    builder.add_node("language_agent", language_agent_node)
+    builder.add_node("tts", tts_node)
+    builder.set_entry_point("stt")
+    builder.add_edge("stt", "nlu")
+    builder.add_edge("nlu", "api_agent")
+    builder.add_edge("api_agent", "scraping_agent")
+    builder.add_edge("scraping_agent", "retriever_agent")
+    builder.add_edge("retriever_agent", "analysis_agent")
+    builder.add_edge("analysis_agent", "language_agent")
+    builder.add_edge("language_agent", "tts")
+    builder.add_edge("tts", END)
+    return builder.compile()
+graph = build_market_brief_graph()
+@app.post("/market_brief")
+async def market_brief(audio: UploadFile = File(...)):
+    logger.info("Received request to /market_brief")
+    if not audio.content_type or not audio.content_type.startswith("audio/"):
+        raise HTTPException(
+            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
+            detail="Invalid file type.",
+        )
+    current_run_state = MarketBriefState()
+    try:
+        current_run_state.audio_input = await audio.read()
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to read audio: {e}",
+        )
+    processed_state: MarketBriefState = current_run_state
+    try:
+        logger.info("Invoking LangGraph workflow...")
+        initial_state_dict = current_run_state.model_dump(exclude_none=True)
+        invocation_result = await graph.ainvoke(initial_state_dict)
+        if isinstance(invocation_result, dict):
+            processed_state = MarketBriefState(**invocation_result)
+            logger.info("LangGraph execution finished. State updated.")
+        else:
+            logger.error(
+                f"LangGraph ainvoke returned unexpected type: {type(invocation_result)}. Using partially updated state."
+            )
+            processed_state.errors.append(
+                f"Internal graph error: result type {type(invocation_result)}"
+            )
+    except HTTPException as e:
+        logger.error(
+            f"Graph execution stopped due to HTTPException from an agent: {e.detail}"
+        )
+        processed_state.errors.append(f"Agent call failed: {e.detail}")
+    except Exception as e:
+        error_msg = f"An unexpected error occurred during graph execution: {e}"
+        logger.error(error_msg, exc_info=True)
+        processed_state.errors.append(error_msg)
+    response_payload = {
+        "transcript": processed_state.user_text,
+        "brief": processed_state.brief,
+        "audio": (
+            processed_state.audio_output.hex() if processed_state.audio_output else None
+        ),
+        "errors": processed_state.errors,
+        "warnings": processed_state.warnings,
+        "status": "success" if not processed_state.errors else "failed",
+        "message": "Market brief process completed."
+        + (" With errors." if processed_state.errors else " Successfully."),
+        "nlu_detected": processed_state.nlu_results,
+        "analysis_details": processed_state.analysis,
+    }
+    logger.info(
+        f"Request finished. Status: {response_payload['status']}. Errors: {len(response_payload['errors'])}. Warnings: {len(response_payload['warnings'])}."
+    )
+    return response_payload

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+fastapi
+uvicorn
+pydantic
+requests
+python-dotenv
+numpy
+faiss-cpu
+sentence-transformers
+langchain>=0.1.0
+langchain-core
+langchain-community
+langchain-openai
+gtts
+faster-whisper
+python-multipart
+langgraph
+streamlit
+streamlit-mic-recorder

streamlit/app.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import streamlit as st
+import httpx
+import os
+import io
+from dotenv import load_dotenv
+import logging
+import asyncio
+from streamlit_mic_recorder import mic_recorder
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+load_dotenv()
+ORCHESTRATOR_URL = os.getenv("ORCHESTRATOR_URL")
+if "processing_state" not in st.session_state:
+    st.session_state.processing_state = "initial"
+if "orchestrator_response" not in st.session_state:
+    st.session_state.orchestrator_response = None
+if "audio_bytes_input" not in st.session_state:
+    st.session_state.audio_bytes_input = None
+if "audio_filename" not in st.session_state:
+    st.session_state.audio_filename = None
+if "audio_filetype" not in st.session_state:
+    st.session_state.audio_filetype = None
+if "last_audio_source" not in st.session_state:
+    st.session_state.last_audio_source = None
+if "current_recording_id" not in st.session_state:
+    st.session_state.current_recording_id = None
+async def call_orchestrator(audio_bytes: bytes, filename: str, content_type: str):
+    url = f"{ORCHESTRATOR_URL}/market_brief"
+    files = {"audio": (filename, audio_bytes, content_type)}
+    logger.info(
+        f"Calling orchestrator at {url} with audio file: {filename} ({content_type})"
+    )
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(url, files=files, timeout=180.0)
+            response.raise_for_status()
+            logger.info(f"Orchestrator returned status {response.status_code}.")
+            return response.json()
+    except httpx.RequestError as e:
+        error_msg = f"HTTP Request failed: {e}"
+        logger.error(error_msg)
+        return {
+            "status": "error",
+            "message": "Error communicating with orchestrator.",
+            "errors": [error_msg],
+            "transcript": None,
+            "brief": None,
+            "audio": None,
+        }
+    except Exception as e:
+        error_msg = f"An unexpected error occurred: {e}"
+        logger.error(error_msg)
+        return {
+            "status": "error",
+            "message": "An unexpected error occurred.",
+            "errors": [error_msg],
+            "transcript": None,
+            "brief": None,
+            "audio": None,
+        }
+st.set_page_config(layout="wide")
+st.title("📈 AI Financial Assistant - Morning Market Brief")
+st.markdown(
+    "Ask your query verbally (e.g., 'What's our risk exposure in Asia tech stocks today, and highlight any earnings surprises?') "
+    "or upload an audio file."
+)
+input_method = st.radio(
+    "Choose input method:",
+    ("Record Audio", "Upload File"),
+    horizontal=True,
+    index=0,
+    key="input_method_radio",
+)
+audio_data_ready = False
+if st.session_state.audio_bytes_input is not None:
+    audio_data_ready = True
+if input_method == "Record Audio":
+    st.subheader("Record Your Query")
+    if st.session_state.last_audio_source == "uploader":
+        st.session_state.audio_bytes_input = None
+        st.session_state.audio_filename = None
+        st.session_state.audio_filetype = None
+        st.session_state.last_audio_source = "recorder"
+        audio_data_ready = False
+    audio_info = mic_recorder(
+        start_prompt="⏺️ Start Recording",
+        stop_prompt="⏹️ Stop Recording",
+        just_once=False,
+        use_container_width=True,
+        format="wav",
+        key="mic_recorder_widget",
+    )
+    if audio_info and audio_info.get("bytes"):
+        if st.session_state.current_recording_id != audio_info.get("id"):
+            st.session_state.current_recording_id = audio_info.get("id")
+            st.success("Recording complete! Click 'Generate Market Brief' below.")
+            st.session_state.audio_bytes_input = audio_info["bytes"]
+            st.session_state.audio_filename = f"live_recording_{audio_info['id']}.wav"
+            st.session_state.audio_filetype = "audio/wav"
+            st.session_state.last_audio_source = "recorder"
+            audio_data_ready = True
+            st.session_state.processing_state = "initial"
+            st.session_state.orchestrator_response = None
+            st.audio(audio_info["bytes"])
+        elif st.session_state.audio_bytes_input:
+            audio_data_ready = True
+            st.audio(st.session_state.audio_bytes_input)
+    elif (
+        st.session_state.last_audio_source == "recorder"
+        and st.session_state.audio_bytes_input
+    ):
+        st.markdown("Using last recording:")
+        st.audio(st.session_state.audio_bytes_input)
+        audio_data_ready = True
+elif input_method == "Upload File":
+    st.subheader("Upload Audio File")
+    if st.session_state.last_audio_source == "recorder":
+        st.session_state.audio_bytes_input = None
+        st.session_state.audio_filename = None
+        st.session_state.audio_filetype = None
+        st.session_state.last_audio_source = "uploader"
+        st.session_state.current_recording_id = None
+        audio_data_ready = False
+    if "uploaded_file_state" not in st.session_state:
+        st.session_state.uploaded_file_state = None
+    uploaded_file = st.file_uploader(
+        "Select Audio File",
+        type=["wav", "mp3", "m4a", "ogg", "flac"],
+        key="file_uploader_key",
+    )
+    if uploaded_file is not None:
+        if st.session_state.uploaded_file_state != uploaded_file:
+            st.session_state.uploaded_file_state = uploaded_file
+            st.session_state.audio_bytes_input = uploaded_file.getvalue()
+            st.session_state.audio_filename = uploaded_file.name
+            st.session_state.audio_filetype = uploaded_file.type
+            st.session_state.last_audio_source = "uploader"
+            audio_data_ready = True
+            st.session_state.processing_state = "initial"
+            st.session_state.orchestrator_response = None
+            st.success(f"File '{uploaded_file.name}' ready.")
+            st.audio(
+                st.session_state.audio_bytes_input,
+                format=st.session_state.audio_filetype,
+            )
+        elif st.session_state.audio_bytes_input:
+            audio_data_ready = True
+            st.audio(
+                st.session_state.audio_bytes_input,
+                format=st.session_state.audio_filetype,
+            )
+    elif (
+        st.session_state.last_audio_source == "uploader"
+        and st.session_state.audio_bytes_input
+    ):
+        st.markdown("Using last uploaded file:")
+        st.audio(
+            st.session_state.audio_bytes_input, format=st.session_state.audio_filetype
+        )
+        audio_data_ready = True
+st.divider()
+button_disabled = (
+    not audio_data_ready or st.session_state.processing_state == "processing"
+)
+if st.button(
+    "Generate Market Brief",
+    disabled=button_disabled,
+    type="primary",
+    use_container_width=True,
+    key="generate_button",
+):
+    if st.session_state.audio_bytes_input:
+        st.session_state.processing_state = "processing"
+        st.session_state.orchestrator_response = None
+        logger.info(
+            f"Generate Market Brief button clicked. Source: {st.session_state.last_audio_source}, Filename: {st.session_state.audio_filename}"
+        )
+        st.rerun()
+    else:
+        st.warning("Please record or upload an audio query first.")
+if st.session_state.processing_state == "processing":
+    if (
+        st.session_state.audio_bytes_input
+        and st.session_state.audio_filename
+        and st.session_state.audio_filetype
+    ):
+        with st.spinner("Processing your request... This may take a moment. 🤖"):
+            logger.info(
+                f"Calling orchestrator with filename: {st.session_state.audio_filename}, type: {st.session_state.audio_filetype}, bytes: {len(st.session_state.audio_bytes_input)}"
+            )
+            try:
+                response = asyncio.run(
+                    call_orchestrator(
+                        st.session_state.audio_bytes_input,
+                        st.session_state.audio_filename,
+                        st.session_state.audio_filetype,
+                    )
+                )
+                st.session_state.orchestrator_response = response
+                is_successful_response = True
+                if not response:
+                    is_successful_response = False
+                elif (
+                    response.get("status") == "error"
+                    or response.get("status") == "failed"
+                ):
+                    is_successful_response = False
+                elif response.get("errors") and len(response.get("errors")) > 0:
+                    is_successful_response = False
+                st.session_state.processing_state = (
+                    "completed" if is_successful_response else "error"
+                )
+            except Exception as e:
+                logger.error(
+                    f"Error during orchestrator call in Streamlit: {e}", exc_info=True
+                )
+                st.session_state.orchestrator_response = {
+                    "status": "error",
+                    "message": f"Streamlit failed to call orchestrator: {str(e)}",
+                    "errors": [str(e)],
+                    "transcript": None,
+                    "brief": None,
+                    "audio": None,
+                }
+                st.session_state.processing_state = "error"
+        st.rerun()
+    else:
+        st.error("Audio data is missing for processing. Please record or upload again.")
+        st.session_state.processing_state = "initial"
+if st.session_state.processing_state in ["completed", "error"]:
+    response = st.session_state.orchestrator_response
+    st.subheader("📝 Results")
+    if response is None:
+        st.error("No response received from the orchestrator.")
+    elif (
+        response.get("status") == "failed"
+        or response.get("status") == "error"
+        or (response.get("errors") and len(response.get("errors")) > 0)
+    ):
+        st.error(
+            f"Workflow {response.get('status', 'failed')}: {response.get('message', 'Check errors below.')}"
+        )
+        if response.get("errors"):
+            st.warning("Details of Errors:")
+            for i, err in enumerate(response["errors"]):
+                st.markdown(f"`Error {i+1}`: {err}")
+        if response.get("warnings"):
+            st.warning("Details of Warnings:")
+            for i, warn in enumerate(response["warnings"]):
+                st.markdown(f"`Warning {i+1}`: {warn}")
+        if response.get("transcript"):
+            st.markdown("---")
+            st.markdown("Transcript (despite errors):")
+            st.caption(response.get("transcript"))
+        if response.get("brief"):
+            st.markdown("---")
+            st.markdown("Generated Brief (despite errors):")
+            st.caption(response.get("brief"))
+    else:
+        st.success(response.get("message", "Market brief generated successfully!"))
+        if response.get("transcript"):
+            st.markdown("---")
+            st.markdown("Your Query (Transcript):")
+            st.caption(response.get("transcript"))
+        else:
+            st.info("Transcript not available.")
+        if response.get("brief"):
+            st.markdown("---")
+            st.markdown("Generated Brief:")
+            st.write(response.get("brief"))
+        else:
+            st.info("Brief text not available.")
+        audio_hex = response.get("audio")
+        if audio_hex:
+            st.markdown("---")
+            st.markdown("Audio Brief:")
+            try:
+                if not isinstance(audio_hex, str) or not all(
+                    c in "0123456789abcdefABCDEF" for c in audio_hex
+                ):
+                    raise ValueError("Invalid hex string for audio.")
+                audio_bytes_output = bytes.fromhex(audio_hex)
+                st.audio(audio_bytes_output, format="audio/mpeg")
+            except ValueError as ve:
+                st.error(f"⚠️ Failed to decode audio data: {ve}")
+            except Exception as e:
+                st.error(f"⚠️ Failed to play audio: {e}")
+        else:
+            st.info("Audio brief not available.")
+        if response.get("warnings"):
+            st.markdown("---")
+            st.warning("Process Warnings:")
+            for i, warn in enumerate(response["warnings"]):
+                st.markdown(f"`Warning {i+1}`: {warn}")