chatfed_orchestrator2

Sleeping

App Files Files Community

mtyrrell commited on 26 days ago

Commit

9effd1f

1 Parent(s): acb99db

refactored for langserve

Browse files

Files changed (1) hide show

app/main.py +520 -133

app/main.py CHANGED Viewed

@@ -1,179 +1,566 @@
 import gradio as gr
-from gradio_client import Client
 from langgraph.graph import StateGraph, START, END
-from typing import TypedDict, Optional
 import io
 from PIL import Image
-#OPEN QUESTION: SHOULD WE PASS ALL PARAMS FROM THE ORCHESTRATOR TO THE NODES INSTEAD OF SETTING IN EACH MODULE?
-# Define the state schema
 class GraphState(TypedDict):
     query: str
     context: str
     result: str
-    # Add orchestrator-level parameters (addressing your open question)
     reports_filter: str
     sources_filter: str
     subtype_filter: str
     year_filter: str
-# node 2: retriever
 def retrieve_node(state: GraphState) -> GraphState:
-    client = Client("giz/chatfed_retriever")  # HF repo name
-    context = client.predict(
-        query=state["query"],
-        reports_filter=state.get("reports_filter", ""),
-        sources_filter=state.get("sources_filter", ""),
-        subtype_filter=state.get("subtype_filter", ""),
-        year_filter=state.get("year_filter", ""),
-        api_name="/retrieve"
-    )
-    return {"context": context}
-# node 3: generator
 def generate_node(state: GraphState) -> GraphState:
-    client = Client("giz/chatfed_generator")
-    result = client.predict(
-        query=state["query"],
-        context=state["context"],
-        api_name="/generate"
-    )
-    return {"result": result}
-# build the graph
 workflow = StateGraph(GraphState)
-# Add nodes
 workflow.add_node("retrieve", retrieve_node)
 workflow.add_node("generate", generate_node)
-# Add edges
 workflow.add_edge(START, "retrieve")
 workflow.add_edge("retrieve", "generate")
 workflow.add_edge("generate", END)
-# Compile the graph
-graph = workflow.compile()
-# Single tool for processing queries
-def process_query(
     query: str,
     reports_filter: str = "",
     sources_filter: str = "",
     subtype_filter: str = "",
     year_filter: str = ""
 ) -> str:
-    """
-    Execute the ChatFed orchestration pipeline to process a user query.
-    This function orchestrates a two-step workflow:
-    1. Retrieve relevant context using the ChatFed retriever service with optional filters
-    2. Generate a response using the ChatFed generator service with the retrieved context
-    Args:
-        query (str): The user's input query/question to be processed
-        reports_filter (str, optional): Filter for specific report types. Defaults to "".
-        sources_filter (str, optional): Filter for specific data sources. Defaults to "".
-        subtype_filter (str, optional): Filter for document subtypes. Defaults to "".
-        year_filter (str, optional): Filter for specific years. Defaults to "".
-    Returns:
-        str: The generated response from the ChatFed generator service
-    """
-    initial_state = {
-        "query": query,
-        "context": "",
-        "result": "",
-        "reports_filter": reports_filter or "",
-        "sources_filter": sources_filter or "",
-        "subtype_filter": subtype_filter or "",
-        "year_filter": year_filter or ""
-    }
-    final_state = graph.invoke(initial_state)
-    return final_state["result"]
-# Simple testing interface
-ui = gr.Interface(
-    fn=process_query,
-    inputs=gr.Textbox(lines=2, placeholder="Enter query here"),
-    outputs="text",
-    flagging_mode="never"
-)
-# Add a function to generate the graph visualization
 def get_graph_visualization():
-    """Generate and return the LangGraph workflow visualization as a PIL Image."""
-    # Generate the graph as PNG bytes
-    graph_png_bytes = graph.get_graph().draw_mermaid_png()
-    # Convert bytes to PIL Image for Gradio display
-    graph_image = Image.open(io.BytesIO(graph_png_bytes))
-    return graph_image
-# Guidance for ChatUI - can be removed later. Questionable whether front end even necessary. Maybe nice to show the graph.
-with gr.Blocks(title="ChatFed Orchestrator") as demo:
-    gr.Markdown("# ChatFed Orchestrator")
-    gr.Markdown("This LangGraph server exposes MCP endpoints for the ChatUI module to call (which triggers the graph).")
-    with gr.Row():
-        # Left column - Graph visualization
-        with gr.Column(scale=1):
-            gr.Markdown("**Workflow Visualization**")
-            graph_display = gr.Image(
-                value=get_graph_visualization(),
-                label="LangGraph Workflow",
-                interactive=False,
-                height=300
-            )
-            # Add a refresh button for the graph
-            refresh_graph_btn = gr.Button("🔄 Refresh Graph", size="sm")
-            refresh_graph_btn.click(
-                fn=get_graph_visualization,
-                outputs=graph_display
-            )
-        # Right column - Interface and documentation
-        with gr.Column(scale=2):
-            gr.Markdown("**Available MCP Tools:**")
-            with gr.Accordion("MCP Endpoint Information", open=True):
-                gr.Markdown(f"""
-                **MCP Server Endpoint:** https://giz-chatfed-orchestrator.hf.space/gradio_api/mcp/sse
-                **For ChatUI Integration:**
-                ```python
-                from gradio_client import Client
-                # Connect to orchestrator
-                orchestrator_client = Client("https://giz-chatfed-orchestrator.hf.space")
-                # Basic usage (no filters)
-                response = orchestrator_client.predict(
-                    query="query",
-                    api_name="/process_query"
-                )
-                # Advanced usage with any combination of filters
-                response = orchestrator_client.predict(
-                    query="query",
-                    reports_filter="annual_reports",
-                    sources_filter="internal",
-                    year_filter="2024",
-                    api_name="/process_query"
-                )
-                ```
-                """)
-    with gr.Accordion("Quick Testing Interface", open=True):
-        ui.render()
-if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
-        server_port=7860,
-        mcp_server=True,
-        show_error=True
     )

 import gradio as gr
+from fastapi import FastAPI
+from langserve import add_routes
 from langgraph.graph import StateGraph, START, END
+from typing import Optional, Dict, Any, List, Literal, AsyncGenerator
+from typing_extensions import TypedDict
+from pydantic import BaseModel
+from gradio_client import Client
+import uvicorn
+import os
+from datetime import datetime
+import logging
+from contextlib import asynccontextmanager
 import io
 from PIL import Image
+import threading
+import json
+import asyncio
+from langchain_core.runnables import RunnableLambda
+from langchain_core.output_parsers import StrOutputParser
+# Local imports
+from utils import getconfig
+config = getconfig("params.cfg")
+RETRIEVER = config.get("retriever", "RETRIEVER")
+GENERATOR = config.get("generator", "GENERATOR")
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Define langgraph state schema
 class GraphState(TypedDict):
     query: str
     context: str
     result: str
     reports_filter: str
     sources_filter: str
     subtype_filter: str
     year_filter: str
+    metadata: Optional[Dict[str, Any]]
+# LangServe input/output schemas
+class ChatFedInput(TypedDict):
+    query: str
+    reports_filter: Optional[str]
+    sources_filter: Optional[str]
+    subtype_filter: Optional[str]
+    year_filter: Optional[str]
+    session_id: Optional[str]
+    user_id: Optional[str]
+class ChatFedOutput(TypedDict):
+    result: str
+    metadata: Dict[str, Any]
+# ChatUI specific schemas
+class ChatUIStreamInput(BaseModel):
+    text: str  # ChatUI sends input as "text" field
+class ChatUIStreamOutput(BaseModel):
+    content: str
+class ChatMessage(BaseModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
+class ChatUIInput(BaseModel):
+    messages: List[ChatMessage]
+# Retriever
 def retrieve_node(state: GraphState) -> GraphState:
+    start_time = datetime.now()
+    logger.info(f"Starting retrieval for query: {state['query'][:100]}...")
+    try:
+        client = Client(RETRIEVER)
+        context = client.predict(
+            query=state["query"],
+            reports_filter=state.get("reports_filter", ""),
+            sources_filter=state.get("sources_filter", ""),
+            subtype_filter=state.get("subtype_filter", ""),
+            year_filter=state.get("year_filter", ""),
+            api_name="/retrieve"
+        )
+        duration = (datetime.now() - start_time).total_seconds()
+        metadata = state.get("metadata", {})
+        metadata.update({
+            "retrieval_duration_seconds": duration,
+            "context_length": len(context) if context else 0,
+            "retrieval_success": True
+        })
+        logger.info(f"Retrieval completed in {duration:.2f}s, context length: {len(context) if context else 0}")
+        return {"context": context, "metadata": metadata}
+    except Exception as e:
+        duration = (datetime.now() - start_time).total_seconds()
+        logger.error(f"Retrieval failed after {duration:.2f}s: {str(e)}")
+        metadata = state.get("metadata", {})
+        metadata.update({
+            "retrieval_duration_seconds": duration,
+            "retrieval_success": False,
+            "retrieval_error": str(e)
+        })
+        return {"context": "", "metadata": metadata}
+# Generator
 def generate_node(state: GraphState) -> GraphState:
+    start_time = datetime.now()
+    logger.info(f"Starting generation for query: {state['query'][:100]}...")
+    try:
+        client = Client(GENERATOR)
+        result = client.predict(
+            query=state["query"],
+            context=state["context"],
+            api_name="/generate"
+        )
+        duration = (datetime.now() - start_time).total_seconds()
+        metadata = state.get("metadata", {})
+        metadata.update({
+            "generation_duration_seconds": duration,
+            "result_length": len(result) if result else 0,
+            "generation_success": True
+        })
+        logger.info(f"Generation completed in {duration:.2f}s, result length: {len(result) if result else 0}")
+        return {"result": result, "metadata": metadata}
+    except Exception as e:
+        duration = (datetime.now() - start_time).total_seconds()
+        logger.error(f"Generation failed after {duration:.2f}s: {str(e)}")
+        metadata = state.get("metadata", {})
+        metadata.update({
+            "generation_duration_seconds": duration,
+            "generation_success": False,
+            "generation_error": str(e)
+        })
+        return {"result": f"Error generating response: {str(e)}", "metadata": metadata}
+# Build graph
 workflow = StateGraph(GraphState)
 workflow.add_node("retrieve", retrieve_node)
 workflow.add_node("generate", generate_node)
 workflow.add_edge(START, "retrieve")
 workflow.add_edge("retrieve", "generate")
 workflow.add_edge("generate", END)
+compiled_graph = workflow.compile()
+# Core processing function (shared by both Gradio and LangServe)
+def process_chatfed_query_core(
+    query: str,
+    reports_filter: str = "",
+    sources_filter: str = "",
+    subtype_filter: str = "",
+    year_filter: str = "",
+    session_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+    return_metadata: bool = False
+):
+    """Core processing function used by both Gradio and LangServe interfaces."""
+    start_time = datetime.now()
+    if not session_id:
+        session_id = f"session_{start_time.strftime('%Y%m%d_%H%M%S')}"
+    logger.info(f"Processing query in session {session_id}: {query[:100]}...")
+    try:
+        initial_state = {
+            "query": query,
+            "context": "",
+            "result": "",
+            "reports_filter": reports_filter or "",
+            "sources_filter": sources_filter or "",
+            "subtype_filter": subtype_filter or "",
+            "year_filter": year_filter or "",
+            "metadata": {
+                "session_id": session_id,
+                "user_id": user_id,
+                "start_time": start_time.isoformat(),
+                "orchestrator": "hybrid_gradio_langserve"
+            }
+        }
+        final_state = compiled_graph.invoke(initial_state)
+        total_duration = (datetime.now() - start_time).total_seconds()
+        final_metadata = final_state.get("metadata", {})
+        final_metadata.update({
+            "total_duration_seconds": total_duration,
+            "end_time": datetime.now().isoformat(),
+            "pipeline_success": True
+        })
+        logger.info(f"Query processing completed in {total_duration:.2f}s for session {session_id}")
+        if return_metadata:
+            return {"result": final_state["result"], "metadata": final_metadata}
+        else:
+            return final_state["result"]
+    except Exception as e:
+        total_duration = (datetime.now() - start_time).total_seconds()
+        logger.error(f"Pipeline failed after {total_duration:.2f}s for session {session_id}: {str(e)}")
+        if return_metadata:
+            error_metadata = {
+                "session_id": session_id,
+                "total_duration_seconds": total_duration,
+                "pipeline_success": False,
+                "error": str(e)
+            }
+            return {"result": f"Error processing query: {str(e)}", "metadata": error_metadata}
+        else:
+            return f"Error processing query: {str(e)}"
+# =============================================================================
+# GRADIO INTERFACE (MCP ENDPOINTS)
+# =============================================================================
+# Gradio wrapper functions for MCP compatibility
+def process_query_gradio(
     query: str,
     reports_filter: str = "",
     sources_filter: str = "",
     subtype_filter: str = "",
     year_filter: str = ""
 ) -> str:
+    """Gradio-compatible function that exposes MCP endpoints."""
+    return process_chatfed_query_core(
+        query=query,
+        reports_filter=reports_filter,
+        sources_filter=sources_filter,
+        subtype_filter=subtype_filter,
+        year_filter=year_filter,
+        session_id=f"gradio_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+        return_metadata=False
+    )
 def get_graph_visualization():
+    """Generate graph visualization for Gradio interface."""
+    try:
+        graph_png_bytes = compiled_graph.get_graph().draw_mermaid_png()
+        return Image.open(io.BytesIO(graph_png_bytes))
+    except Exception as e:
+        logger.error(f"Failed to generate graph visualization: {e}")
+        return None
+# Create Gradio interface
+def create_gradio_interface():
+    with gr.Blocks(title="ChatFed Orchestrator - MCP Endpoints") as demo:
+        gr.Markdown("# ChatFed Orchestrator")
+        gr.Markdown("**MCP Server Endpoints Available** - This interface provides MCP compatibility for ChatUI integration.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("**Workflow Visualization**")
+                graph_display = gr.Image(
+                    value=get_graph_visualization(),
+                    label="LangGraph Workflow",
+                    interactive=False,
+                    height=300
+                )
+                refresh_graph_btn = gr.Button("🔄 Refresh Graph", size="sm")
+                refresh_graph_btn.click(fn=get_graph_visualization, outputs=graph_display)
+                gr.Markdown("**🔗 MCP Integration**")
+                gr.Markdown("MCP endpoints are active and ready for ChatUI integration.")
+            with gr.Column(scale=2):
+                gr.Markdown("**MCP Endpoint Information**")
+                with gr.Accordion("MCP Usage", open=True):
+                    gr.Markdown("""
+                    **MCP Server Endpoint:** Available at `/gradio_api/mcp/sse`
+                    **For ChatUI Integration:**
+                    ```python
+                    from gradio_client import Client
+                    # Connect to orchestrator MCP endpoint
+                    client = Client("https://your-space.hf.space")
+                    # Basic usage
+                    response = client.predict(
+                        query="your question",
+                        api_name="/process_query_gradio"
+                    )
+                    # With filters
+                    response = client.predict(
+                        query="your question",
+                        reports_filter="annual_reports",
+                        sources_filter="internal",
+                        year_filter="2024",
+                        api_name="/process_query_gradio"
+                    )
+                    ```
+                    """)
+        with gr.Accordion("Test Interface", open=False):
+            # Test interface
+            with gr.Row():
+                with gr.Column():
+                    query_input = gr.Textbox(label="Query", lines=2, placeholder="Enter your question...")
+                    reports_filter_input = gr.Textbox(label="Reports Filter", placeholder="e.g., annual_reports")
+                    sources_filter_input = gr.Textbox(label="Sources Filter", placeholder="e.g., internal")
+                    subtype_filter_input = gr.Textbox(label="Subtype Filter", placeholder="e.g., financial")
+                    year_filter_input = gr.Textbox(label="Year Filter", placeholder="e.g., 2024")
+                    submit_btn = gr.Button("Submit", variant="primary")
+                with gr.Column():
+                    output = gr.Textbox(label="Response", lines=10)
+            submit_btn.click(
+                fn=process_query_gradio,
+                inputs=[query_input, reports_filter_input, sources_filter_input, subtype_filter_input, year_filter_input],
+                outputs=output
+            )
+    return demo
+# =============================================================================
+# CHATUI STREAMING ADAPTER
+# =============================================================================
+async def chatui_streaming_adapter(data: ChatUIStreamInput) -> AsyncGenerator[str, None]:
+    """
+    Streaming adapter for ChatUI integration.
+    ChatUI expects streaming responses.
+    """
+    try:
+        logger.info(f"ChatUI streaming request: {data.text[:100]}...")
+        # Process the query using your core function
+        result = process_chatfed_query_core(
+            query=data.text,
+            session_id=f"chatui_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+            return_metadata=False
+        )
+        # Stream the response word by word or chunk by chunk
+        words = result.split()
+        for i, word in enumerate(words):
+            if i == 0:
+                yield word
+            else:
+                yield f" {word}"
+            # Small delay to simulate streaming
+            await asyncio.sleep(0.01)
+    except Exception as e:
+        logger.error(f"ChatUI streaming error: {str(e)}")
+        yield f"Error processing request: {str(e)}"
+def chatui_non_streaming_adapter(data: ChatUIStreamInput):
+    """
+    Non-streaming adapter for ChatUI (fallback).
+    """
+    try:
+        logger.info(f"ChatUI non-streaming request: {data.text[:100]}...")
+        result = process_chatfed_query_core(
+            query=data.text,
+            session_id=f"chatui_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+            return_metadata=False
+        )
+        return {"content": result}
+    except Exception as e:
+        logger.error(f"ChatUI adapter error: {str(e)}")
+        return {"content": f"Error processing request: {str(e)}"}
+# =============================================================================
+# LANGSERVE API (TELEMETRY)
+# =============================================================================
+def process_chatfed_query_langserve(input_data: ChatFedInput) -> ChatFedOutput:
+    """LangServe function with full metadata return."""
+    result = process_chatfed_query_core(
+        query=input_data["query"],
+        reports_filter=input_data.get("reports_filter", ""),
+        sources_filter=input_data.get("sources_filter", ""),
+        subtype_filter=input_data.get("subtype_filter", ""),
+        year_filter=input_data.get("year_filter", ""),
+        session_id=input_data.get("session_id"),
+        user_id=input_data.get("user_id"),
+        return_metadata=True
+    )
+    return ChatFedOutput(result=result["result"], metadata=result["metadata"])
+def chatui_adapter(data: ChatUIInput):
+    """
+    Adapter to allow ChatUI to send full chat history.
+    We extract the latest user message for ChatFed.
+    """
+    last_user_msg = next(m.content for m in reversed(data.messages) if m.role == "user")
+    result = process_chatfed_query_core(query=last_user_msg)
+    return {"result": result, "metadata": {"source": "chatfed-langserve-adapter"}}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("🚀 Hybrid ChatFed Orchestrator starting up...")
+    logger.info("✅ LangGraph compiled successfully")
+    logger.info("🔗 MCP endpoints will be available via Gradio")
+    logger.info("📊 Enhanced API available via LangServe")
+    logger.info("🎯 ChatUI streaming integration enabled")
+    yield
+    logger.info("🛑 Orchestrator shutting down...")
+# Create FastAPI app with docs disabled
+app = FastAPI(
+    title="ChatFed Orchestrator - Enhanced API",
+    version="1.0.0",
+    description="Enhanced API with observability. MCP endpoints available via Gradio interface.",
+    lifespan=lifespan,
+    docs_url=None,  # Disable /docs endpoint
+    redoc_url=None  # Disable /redoc endpoint
+)
+# Health check
+@app.get("/health")
+async def health_check():
+    return {
+        "status": "healthy",
+        "mcp_endpoints": "available_via_gradio",
+        "enhanced_api": "available_via_langserve",
+        "chatui_integration": "enabled"
+    }
+# Add root endpoint
+@app.get("/")
+async def root():
+    return {
+        "message": "ChatFed Orchestrator API",
+        "version": "1.0.0",
+        "endpoints": {
+            "health": "/health",
+            "chatfed": "/chatfed",
+            "chatfed-chatui": "/chatfed-chatui",
+            "chatfed-ui-stream": "/chatfed-ui-stream",  # New for ChatUI streaming
+            "chatfed-ui": "/chatfed-ui",  # New fallback
+            "process_query": "/process_query"
+        },
+        "gradio_interface": "http://localhost:7861/",
+        "mcp_endpoints": "http://localhost:7861/gradio_api/mcp/sse",
+        "note": "LangServe telemetry enabled - ChatUI integration available via /chatfed-ui-stream"
+    }
+# =============================================================================
+# ADD LANGSERVE ROUTES
+# =============================================================================
+# Convert functions to Runnables
+process_chatfed_query_runnable = RunnableLambda(process_chatfed_query_langserve)
+chatui_adapter_runnable = RunnableLambda(chatui_adapter)
+chatui_streaming_runnable = RunnableLambda(chatui_streaming_adapter)
+chatui_non_streaming_runnable = RunnableLambda(chatui_non_streaming_adapter)
+# Add routes with explicit input/output schemas
+add_routes(
+    app,
+    process_chatfed_query_runnable,
+    path="/chatfed",
+    input_type=ChatFedInput,
+    output_type=ChatFedOutput
+)
+# Original ChatUI-compatible LangServe route
+add_routes(
+    app,
+    chatui_adapter_runnable,
+    path="/chatfed-chatui",
+    input_type=ChatUIInput
+)
+# NEW: ChatUI streaming route (matches your ChatUI config)
+add_routes(
+    app,
+    chatui_streaming_runnable,
+    path="/chatfed-ui-stream",
+    input_type=ChatUIStreamInput,
+    enable_feedback_endpoint=True,
+    enable_public_trace_link_endpoint=True,
+)
+# NEW: ChatUI non-streaming fallback route
+add_routes(
+    app,
+    chatui_non_streaming_runnable,
+    path="/chatfed-ui",
+    input_type=ChatUIStreamInput,
+    output_type=ChatUIStreamOutput,
+    enable_feedback_endpoint=True,
+    enable_public_trace_link_endpoint=True,
+)
+# Backward compatibility endpoint
+@app.post("/process_query")
+async def process_query_endpoint(
+    query: str,
+    reports_filter: str = "",
+    sources_filter: str = "",
+    subtype_filter: str = "",
+    year_filter: str = "",
+    session_id: Optional[str] = None,
+    user_id: Optional[str] = None
+):
+    """Backward compatibility endpoint."""
+    return process_chatfed_query_core(
+        query=query,
+        reports_filter=reports_filter,
+        sources_filter=sources_filter,
+        subtype_filter=subtype_filter,
+        year_filter=year_filter,
+        session_id=session_id,
+        user_id=user_id,
+        return_metadata=False
+    )
+# =============================================================================
+# MAIN APPLICATION LAUNCHER
+# =============================================================================
+def run_gradio_server():
+    """Run Gradio server in a separate thread for MCP endpoints."""
+    demo = create_gradio_interface()
     demo.launch(
         server_name="0.0.0.0",
+        server_port=7861,  # Different port from FastAPI
+        mcp_server=True,
+        show_error=True,
+        share=False,
+        quiet=True
+    )
+if __name__ == "__main__":
+    # Start Gradio server in background thread for MCP endpoints
+    gradio_thread = threading.Thread(target=run_gradio_server, daemon=True)
+    gradio_thread.start()
+    logger.info("🔗 Gradio MCP server started on port 7861")
+    # Start FastAPI server for enhanced API
+    host = os.getenv("HOST", "0.0.0.0")
+    port = int(os.getenv("PORT", "7860"))
+    logger.info(f"🚀 Starting FastAPI server on {host}:{port}")
+    logger.info("📊 Enhanced API with LangServe telemetry available")
+    logger.info("🔗 MCP endpoints available via Gradio on port 7861")
+    logger.info("🎯 ChatUI streaming integration ready at /chatfed-ui-stream")
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        log_level="info",
+        access_log=True
     )