import os, time, random from dotenv import load_dotenv from typing import List, Dict, Any, TypedDict, Annotated import operator # Load environment variables load_dotenv() # LangGraph imports from langgraph.graph import StateGraph, END from langgraph.prebuilt import create_react_agent from langgraph.checkpoint.memory import MemorySaver # LangChain imports from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from langchain_core.tools import tool from langchain_groq import ChatGroq from langchain_google_genai import ChatGoogleGenerativeAI from langchain_nvidia_ai_endpoints import ChatNVIDIA from langchain_core.rate_limiters import InMemoryRateLimiter # Tavily import from tavily import TavilyClient # Advanced Rate Limiter (SILENT) class AdvancedRateLimiter: def __init__(self, requests_per_minute: int): self.requests_per_minute = requests_per_minute self.request_times = [] def wait_if_needed(self): current_time = time.time() # Clean old requests (older than 1 minute) self.request_times = [t for t in self.request_times if current_time - t < 60] # Check if we need to wait if len(self.request_times) >= self.requests_per_minute: wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8) time.sleep(wait_time) # Record this request self.request_times.append(current_time) # Initialize rate limiters for free tiers groq_limiter = AdvancedRateLimiter(requests_per_minute=30) gemini_limiter = AdvancedRateLimiter(requests_per_minute=2) nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5) # NVIDIA free tier tavily_limiter = AdvancedRateLimiter(requests_per_minute=50) # Initialize LangChain rate limiters for NVIDIA nvidia_rate_limiter = InMemoryRateLimiter( requests_per_second=0.083, # 5 requests per minute check_every_n_seconds=0.1, max_bucket_size=5 ) # Initialize LLMs with best free models groq_llm = ChatGroq( model="llama-3.3-70b-versatile", api_key=os.getenv("GROQ_API_KEY"), temperature=0 ) gemini_llm = ChatGoogleGenerativeAI( model="gemini-2.0-flash-thinking-exp", api_key=os.getenv("GOOGLE_API_KEY"), temperature=0 ) # Best NVIDIA models based on search results nvidia_general_llm = ChatNVIDIA( model="meta/llama3-70b-instruct", # Best general model from NVIDIA api_key=os.getenv("NVIDIA_API_KEY"), temperature=0, max_tokens=4000, rate_limiter=nvidia_rate_limiter ) nvidia_code_llm = ChatNVIDIA( model="meta/codellama-70b", # Best code generation model from NVIDIA api_key=os.getenv("NVIDIA_API_KEY"), temperature=0, max_tokens=4000, rate_limiter=nvidia_rate_limiter ) nvidia_math_llm = ChatNVIDIA( model="mistralai/mixtral-8x22b-instruct-v0.1", # Best reasoning model from NVIDIA api_key=os.getenv("NVIDIA_API_KEY"), temperature=0, max_tokens=4000, rate_limiter=nvidia_rate_limiter ) # Initialize Tavily client tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY")) # Define State class AgentState(TypedDict): messages: Annotated[List[HumanMessage | AIMessage], operator.add] query: str agent_type: str final_answer: str # Custom Tools @tool def multiply_tool(a: float, b: float) -> float: """Multiply two numbers together""" return a * b @tool def add_tool(a: float, b: float) -> float: """Add two numbers together""" return a + b @tool def subtract_tool(a: float, b: float) -> float: """Subtract two numbers""" return a - b @tool def divide_tool(a: float, b: float) -> float: """Divide two numbers""" if b == 0: raise ValueError("Cannot divide by zero.") return a / b @tool def tavily_search_tool(query: str) -> str: """Search the web using Tavily for current information""" try: tavily_limiter.wait_if_needed() response = tavily_client.search( query=query, max_results=3, search_depth="basic", include_answer=False ) # Format results results = [] for result in response.get('results', []): results.append(f"Title: {result.get('title', '')}\nContent: {result.get('content', '')}") return "\n\n---\n\n".join(results) except Exception as e: return f"Tavily search failed: {str(e)}" @tool def wiki_search_tool(query: str) -> str: """Search Wikipedia for encyclopedic information""" try: time.sleep(random.uniform(1, 3)) from langchain_community.document_loaders import WikipediaLoader loader = WikipediaLoader(query=query, load_max_docs=1) data = loader.load() return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data]) except Exception as e: return f"Wikipedia search failed: {str(e)}" # Define tools for each agent type math_tools = [multiply_tool, add_tool, subtract_tool, divide_tool] research_tools = [tavily_search_tool, wiki_search_tool] coordinator_tools = [tavily_search_tool, wiki_search_tool] # Node functions def router_node(state: AgentState) -> AgentState: """Route queries to appropriate agent type""" query = state["query"].lower() if any(word in query for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']): agent_type = "math" elif any(word in query for word in ['code', 'program', 'python', 'javascript', 'function', 'algorithm']): agent_type = "code" elif any(word in query for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']): agent_type = "research" else: agent_type = "coordinator" return {**state, "agent_type": agent_type} def math_agent_node(state: AgentState) -> AgentState: """Mathematical specialist agent using NVIDIA Mixtral""" nvidia_limiter.wait_if_needed() system_message = SystemMessage(content="""You are a mathematical specialist with access to calculation tools. Use the appropriate math tools for calculations. Show your work step by step. Always provide precise numerical answers. Finish with: FINAL ANSWER: [numerical result]""") # Create math agent with NVIDIA's best reasoning model math_agent = create_react_agent(nvidia_math_llm, math_tools) # Process query messages = [system_message, HumanMessage(content=state["query"])] config = {"configurable": {"thread_id": "math_thread"}} try: result = math_agent.invoke({"messages": messages}, config) final_message = result["messages"][-1].content return { **state, "messages": state["messages"] + [AIMessage(content=final_message)], "final_answer": final_message } except Exception as e: error_msg = f"Math agent error: {str(e)}" return { **state, "messages": state["messages"] + [AIMessage(content=error_msg)], "final_answer": error_msg } def code_agent_node(state: AgentState) -> AgentState: """Code generation specialist agent using NVIDIA CodeLlama""" nvidia_limiter.wait_if_needed() system_message = SystemMessage(content="""You are an expert coding AI specialist. Generate clean, efficient, and well-documented code. Explain your code solutions clearly. Always provide working code examples. Finish with: FINAL ANSWER: [your code solution]""") # Create code agent with NVIDIA's best code model code_agent = create_react_agent(nvidia_code_llm, []) # Process query messages = [system_message, HumanMessage(content=state["query"])] config = {"configurable": {"thread_id": "code_thread"}} try: result = code_agent.invoke({"messages": messages}, config) final_message = result["messages"][-1].content return { **state, "messages": state["messages"] + [AIMessage(content=final_message)], "final_answer": final_message } except Exception as e: error_msg = f"Code agent error: {str(e)}" return { **state, "messages": state["messages"] + [AIMessage(content=error_msg)], "final_answer": error_msg } def research_agent_node(state: AgentState) -> AgentState: """Research specialist agent using Gemini""" gemini_limiter.wait_if_needed() system_message = SystemMessage(content="""You are a research specialist with access to web search and Wikipedia. Use appropriate search tools to gather comprehensive information. Always cite sources and provide well-researched answers. Synthesize information from multiple sources when possible. Finish with: FINAL ANSWER: [your researched answer]""") # Create research agent research_agent = create_react_agent(gemini_llm, research_tools) # Process query messages = [system_message, HumanMessage(content=state["query"])] config = {"configurable": {"thread_id": "research_thread"}} try: result = research_agent.invoke({"messages": messages}, config) final_message = result["messages"][-1].content return { **state, "messages": state["messages"] + [AIMessage(content=final_message)], "final_answer": final_message } except Exception as e: error_msg = f"Research agent error: {str(e)}" return { **state, "messages": state["messages"] + [AIMessage(content=error_msg)], "final_answer": error_msg } def coordinator_agent_node(state: AgentState) -> AgentState: """Coordinator agent using NVIDIA Llama3""" nvidia_limiter.wait_if_needed() system_message = SystemMessage(content="""You are the main coordinator agent. Analyze queries and provide comprehensive responses. Use search tools for factual information when needed. Always finish with: FINAL ANSWER: [your final answer]""") # Create coordinator agent with NVIDIA's best general model coordinator_agent = create_react_agent(nvidia_general_llm, coordinator_tools) # Process query messages = [system_message, HumanMessage(content=state["query"])] config = {"configurable": {"thread_id": "coordinator_thread"}} try: result = coordinator_agent.invoke({"messages": messages}, config) final_message = result["messages"][-1].content return { **state, "messages": state["messages"] + [AIMessage(content=final_message)], "final_answer": final_message } except Exception as e: error_msg = f"Coordinator agent error: {str(e)}" return { **state, "messages": state["messages"] + [AIMessage(content=error_msg)], "final_answer": error_msg } # Conditional routing function def route_agent(state: AgentState) -> str: """Route to appropriate agent based on agent_type""" agent_type = state.get("agent_type", "coordinator") if agent_type == "math": return "math_agent" elif agent_type == "code": return "code_agent" elif agent_type == "research": return "research_agent" else: return "coordinator_agent" # LangGraph Multi-Agent System class LangGraphMultiAgentSystem: def __init__(self): self.request_count = 0 self.last_request_time = time.time() self.graph = self._create_graph() def _create_graph(self) -> StateGraph: """Create the LangGraph workflow""" workflow = StateGraph(AgentState) # Add nodes workflow.add_node("router", router_node) workflow.add_node("math_agent", math_agent_node) workflow.add_node("code_agent", code_agent_node) workflow.add_node("research_agent", research_agent_node) workflow.add_node("coordinator_agent", coordinator_agent_node) # Add edges workflow.set_entry_point("router") workflow.add_conditional_edges( "router", route_agent, { "math_agent": "math_agent", "code_agent": "code_agent", "research_agent": "research_agent", "coordinator_agent": "coordinator_agent" } ) # All agents end the workflow workflow.add_edge("math_agent", END) workflow.add_edge("code_agent", END) workflow.add_edge("research_agent", END) workflow.add_edge("coordinator_agent", END) # Compile the graph memory = MemorySaver() return workflow.compile(checkpointer=memory) def process_query(self, query: str) -> str: """Process query using LangGraph multi-agent system""" # Global rate limiting (SILENT) current_time = time.time() if current_time - self.last_request_time > 3600: self.request_count = 0 self.last_request_time = current_time self.request_count += 1 # Add delay between requests (SILENT) if self.request_count > 1: time.sleep(random.uniform(3, 10)) # Initial state initial_state = { "messages": [HumanMessage(content=query)], "query": query, "agent_type": "", "final_answer": "" } # Configuration for the graph config = {"configurable": {"thread_id": f"thread_{self.request_count}"}} try: # Run the graph final_state = self.graph.invoke(initial_state, config) return final_state.get("final_answer", "No response generated") except Exception as e: return f"Error: {str(e)}" # Main functions def main(query: str) -> str: """Main function using LangGraph multi-agent system""" langgraph_system = LangGraphMultiAgentSystem() return langgraph_system.process_query(query) def get_final_answer(query: str) -> str: """Extract only the FINAL ANSWER from the response""" full_response = main(query) if "FINAL ANSWER:" in full_response: final_answer = full_response.split("FINAL ANSWER:")[-1].strip() return final_answer else: return full_response.strip() if __name__ == "__main__": # Test the LangGraph system - CLEAN OUTPUT ONLY result = get_final_answer("What are the names of the US presidents who were assassinated?") print(result)