File size: 14,699 Bytes
25c1140
1fa6961
25c1140
 
1fa6961
 
 
 
25c1140
 
 
 
1fa6961
25c1140
 
 
 
 
 
 
cc467c2
25c1140
 
0f81d99
25c1140
0f81d99
25c1140
0f81d99
 
 
25c1140
0f81d99
 
 
 
25c1140
0f81d99
 
25c1140
0f81d99
 
 
b102339
0f81d99
25c1140
 
 
0420d27
cc467c2
25c1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc467c2
25c1140
 
 
 
 
 
 
 
0f81d99
25c1140
 
0f81d99
25c1140
1fa6961
 
25c1140
0f81d99
25c1140
1fa6961
 
25c1140
0f81d99
25c1140
1fa6961
 
25c1140
0f81d99
25c1140
1fa6961
 
 
 
25c1140
0420d27
25c1140
0f81d99
0420d27
cc467c2
 
 
 
 
 
 
 
 
 
 
 
 
 
0f81d99
cc467c2
1fa6961
25c1140
0420d27
25c1140
7c04f3e
25c1140
 
7c04f3e
 
0f81d99
7c04f3e
 
1fa6961
25c1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc467c2
25c1140
 
 
 
 
 
 
 
cc467c2
25c1140
 
 
 
 
 
cc467c2
25c1140
 
 
1fa6961
25c1140
 
 
 
 
1fa6961
25c1140
 
b102339
25c1140
 
 
b102339
25c1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f81d99
25c1140
 
 
1fa6961
25c1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f81d99
 
 
25c1140
0f81d99
25c1140
 
 
0f81d99
25c1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f81d99
 
 
 
 
 
 
 
 
 
25c1140
 
 
 
 
 
 
 
 
0f81d99
25c1140
 
0f81d99
25c1140
 
 
 
 
 
 
0f81d99
25c1140
1fa6961
25c1140
 
 
0f81d99
 
 
 
7c04f3e
0f81d99
 
 
 
 
 
7c04f3e
25c1140
0f81d99
7c04f3e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
import os, time, random
from dotenv import load_dotenv
from typing import List, Dict, Any, TypedDict, Annotated
import operator

# Load environment variables
load_dotenv()

# LangGraph imports
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver

# LangChain imports
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.rate_limiters import InMemoryRateLimiter

# Tavily import
from tavily import TavilyClient

# Advanced Rate Limiter (SILENT)
class AdvancedRateLimiter:
    def __init__(self, requests_per_minute: int):
        self.requests_per_minute = requests_per_minute
        self.request_times = []
        
    def wait_if_needed(self):
        current_time = time.time()
        # Clean old requests (older than 1 minute)
        self.request_times = [t for t in self.request_times if current_time - t < 60]
        
        # Check if we need to wait
        if len(self.request_times) >= self.requests_per_minute:
            wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
            time.sleep(wait_time)
        
        # Record this request
        self.request_times.append(current_time)

# Initialize rate limiters for free tiers
groq_limiter = AdvancedRateLimiter(requests_per_minute=30)
gemini_limiter = AdvancedRateLimiter(requests_per_minute=2)
nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5)  # NVIDIA free tier
tavily_limiter = AdvancedRateLimiter(requests_per_minute=50)

# Initialize LangChain rate limiters for NVIDIA
nvidia_rate_limiter = InMemoryRateLimiter(
    requests_per_second=0.083,  # 5 requests per minute
    check_every_n_seconds=0.1,
    max_bucket_size=5
)

# Initialize LLMs with best free models
groq_llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=os.getenv("GROQ_API_KEY"),
    temperature=0
)

gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-thinking-exp",
    api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0
)

# Best NVIDIA models based on search results
nvidia_general_llm = ChatNVIDIA(
    model="meta/llama3-70b-instruct",  # Best general model from NVIDIA
    api_key=os.getenv("NVIDIA_API_KEY"),
    temperature=0,
    max_tokens=4000,
    rate_limiter=nvidia_rate_limiter
)

nvidia_code_llm = ChatNVIDIA(
    model="meta/codellama-70b",  # Best code generation model from NVIDIA
    api_key=os.getenv("NVIDIA_API_KEY"),
    temperature=0,
    max_tokens=4000,
    rate_limiter=nvidia_rate_limiter
)

nvidia_math_llm = ChatNVIDIA(
    model="mistralai/mixtral-8x22b-instruct-v0.1",  # Best reasoning model from NVIDIA
    api_key=os.getenv("NVIDIA_API_KEY"),
    temperature=0,
    max_tokens=4000,
    rate_limiter=nvidia_rate_limiter
)

# Initialize Tavily client
tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

# Define State
class AgentState(TypedDict):
    messages: Annotated[List[HumanMessage | AIMessage], operator.add]
    query: str
    agent_type: str
    final_answer: str

# Custom Tools
@tool
def multiply_tool(a: float, b: float) -> float:
    """Multiply two numbers together"""
    return a * b

@tool
def add_tool(a: float, b: float) -> float:
    """Add two numbers together"""
    return a + b

@tool
def subtract_tool(a: float, b: float) -> float:
    """Subtract two numbers"""
    return a - b

@tool
def divide_tool(a: float, b: float) -> float:
    """Divide two numbers"""
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

@tool
def tavily_search_tool(query: str) -> str:
    """Search the web using Tavily for current information"""
    try:
        tavily_limiter.wait_if_needed()
        response = tavily_client.search(
            query=query,
            max_results=3,
            search_depth="basic",
            include_answer=False
        )
        
        # Format results
        results = []
        for result in response.get('results', []):
            results.append(f"Title: {result.get('title', '')}\nContent: {result.get('content', '')}")
        
        return "\n\n---\n\n".join(results)
        
    except Exception as e:
        return f"Tavily search failed: {str(e)}"

@tool
def wiki_search_tool(query: str) -> str:
    """Search Wikipedia for encyclopedic information"""
    try:
        time.sleep(random.uniform(1, 3))
        from langchain_community.document_loaders import WikipediaLoader
        loader = WikipediaLoader(query=query, load_max_docs=1)
        data = loader.load()
        return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data])
    except Exception as e:
        return f"Wikipedia search failed: {str(e)}"

# Define tools for each agent type
math_tools = [multiply_tool, add_tool, subtract_tool, divide_tool]
research_tools = [tavily_search_tool, wiki_search_tool]
coordinator_tools = [tavily_search_tool, wiki_search_tool]

# Node functions
def router_node(state: AgentState) -> AgentState:
    """Route queries to appropriate agent type"""
    query = state["query"].lower()
    
    if any(word in query for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']):
        agent_type = "math"
    elif any(word in query for word in ['code', 'program', 'python', 'javascript', 'function', 'algorithm']):
        agent_type = "code"
    elif any(word in query for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']):
        agent_type = "research"
    else:
        agent_type = "coordinator"
    
    return {**state, "agent_type": agent_type}

def math_agent_node(state: AgentState) -> AgentState:
    """Mathematical specialist agent using NVIDIA Mixtral"""
    nvidia_limiter.wait_if_needed()
    
    system_message = SystemMessage(content="""You are a mathematical specialist with access to calculation tools.
    Use the appropriate math tools for calculations.
    Show your work step by step.
    Always provide precise numerical answers.
    Finish with: FINAL ANSWER: [numerical result]""")
    
    # Create math agent with NVIDIA's best reasoning model
    math_agent = create_react_agent(nvidia_math_llm, math_tools)
    
    # Process query
    messages = [system_message, HumanMessage(content=state["query"])]
    config = {"configurable": {"thread_id": "math_thread"}}
    
    try:
        result = math_agent.invoke({"messages": messages}, config)
        final_message = result["messages"][-1].content
        
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=final_message)],
            "final_answer": final_message
        }
    except Exception as e:
        error_msg = f"Math agent error: {str(e)}"
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=error_msg)],
            "final_answer": error_msg
        }

def code_agent_node(state: AgentState) -> AgentState:
    """Code generation specialist agent using NVIDIA CodeLlama"""
    nvidia_limiter.wait_if_needed()
    
    system_message = SystemMessage(content="""You are an expert coding AI specialist.
    Generate clean, efficient, and well-documented code.
    Explain your code solutions clearly.
    Always provide working code examples.
    Finish with: FINAL ANSWER: [your code solution]""")
    
    # Create code agent with NVIDIA's best code model
    code_agent = create_react_agent(nvidia_code_llm, [])
    
    # Process query
    messages = [system_message, HumanMessage(content=state["query"])]
    config = {"configurable": {"thread_id": "code_thread"}}
    
    try:
        result = code_agent.invoke({"messages": messages}, config)
        final_message = result["messages"][-1].content
        
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=final_message)],
            "final_answer": final_message
        }
    except Exception as e:
        error_msg = f"Code agent error: {str(e)}"
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=error_msg)],
            "final_answer": error_msg
        }

def research_agent_node(state: AgentState) -> AgentState:
    """Research specialist agent using Gemini"""
    gemini_limiter.wait_if_needed()
    
    system_message = SystemMessage(content="""You are a research specialist with access to web search and Wikipedia.
    Use appropriate search tools to gather comprehensive information.
    Always cite sources and provide well-researched answers.
    Synthesize information from multiple sources when possible.
    Finish with: FINAL ANSWER: [your researched answer]""")
    
    # Create research agent
    research_agent = create_react_agent(gemini_llm, research_tools)
    
    # Process query
    messages = [system_message, HumanMessage(content=state["query"])]
    config = {"configurable": {"thread_id": "research_thread"}}
    
    try:
        result = research_agent.invoke({"messages": messages}, config)
        final_message = result["messages"][-1].content
        
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=final_message)],
            "final_answer": final_message
        }
    except Exception as e:
        error_msg = f"Research agent error: {str(e)}"
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=error_msg)],
            "final_answer": error_msg
        }

def coordinator_agent_node(state: AgentState) -> AgentState:
    """Coordinator agent using NVIDIA Llama3"""
    nvidia_limiter.wait_if_needed()
    
    system_message = SystemMessage(content="""You are the main coordinator agent.
    Analyze queries and provide comprehensive responses.
    Use search tools for factual information when needed.
    Always finish with: FINAL ANSWER: [your final answer]""")
    
    # Create coordinator agent with NVIDIA's best general model
    coordinator_agent = create_react_agent(nvidia_general_llm, coordinator_tools)
    
    # Process query
    messages = [system_message, HumanMessage(content=state["query"])]
    config = {"configurable": {"thread_id": "coordinator_thread"}}
    
    try:
        result = coordinator_agent.invoke({"messages": messages}, config)
        final_message = result["messages"][-1].content
        
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=final_message)],
            "final_answer": final_message
        }
    except Exception as e:
        error_msg = f"Coordinator agent error: {str(e)}"
        return {
            **state,
            "messages": state["messages"] + [AIMessage(content=error_msg)],
            "final_answer": error_msg
        }

# Conditional routing function
def route_agent(state: AgentState) -> str:
    """Route to appropriate agent based on agent_type"""
    agent_type = state.get("agent_type", "coordinator")
    
    if agent_type == "math":
        return "math_agent"
    elif agent_type == "code":
        return "code_agent"
    elif agent_type == "research":
        return "research_agent"
    else:
        return "coordinator_agent"

# LangGraph Multi-Agent System
class LangGraphMultiAgentSystem:
    def __init__(self):
        self.request_count = 0
        self.last_request_time = time.time()
        self.graph = self._create_graph()
    
    def _create_graph(self) -> StateGraph:
        """Create the LangGraph workflow"""
        workflow = StateGraph(AgentState)
        
        # Add nodes
        workflow.add_node("router", router_node)
        workflow.add_node("math_agent", math_agent_node)
        workflow.add_node("code_agent", code_agent_node)
        workflow.add_node("research_agent", research_agent_node)
        workflow.add_node("coordinator_agent", coordinator_agent_node)
        
        # Add edges
        workflow.set_entry_point("router")
        workflow.add_conditional_edges(
            "router",
            route_agent,
            {
                "math_agent": "math_agent",
                "code_agent": "code_agent",
                "research_agent": "research_agent",
                "coordinator_agent": "coordinator_agent"
            }
        )
        
        # All agents end the workflow
        workflow.add_edge("math_agent", END)
        workflow.add_edge("code_agent", END)
        workflow.add_edge("research_agent", END)
        workflow.add_edge("coordinator_agent", END)
        
        # Compile the graph
        memory = MemorySaver()
        return workflow.compile(checkpointer=memory)
    
    def process_query(self, query: str) -> str:
        """Process query using LangGraph multi-agent system"""
        # Global rate limiting (SILENT)
        current_time = time.time()
        if current_time - self.last_request_time > 3600:
            self.request_count = 0
            self.last_request_time = current_time
        
        self.request_count += 1
        
        # Add delay between requests (SILENT)
        if self.request_count > 1:
            time.sleep(random.uniform(3, 10))
        
        # Initial state
        initial_state = {
            "messages": [HumanMessage(content=query)],
            "query": query,
            "agent_type": "",
            "final_answer": ""
        }
        
        # Configuration for the graph
        config = {"configurable": {"thread_id": f"thread_{self.request_count}"}}
        
        try:
            # Run the graph
            final_state = self.graph.invoke(initial_state, config)
            return final_state.get("final_answer", "No response generated")
            
        except Exception as e:
            return f"Error: {str(e)}"

# Main functions
def main(query: str) -> str:
    """Main function using LangGraph multi-agent system"""
    langgraph_system = LangGraphMultiAgentSystem()
    return langgraph_system.process_query(query)

def get_final_answer(query: str) -> str:
    """Extract only the FINAL ANSWER from the response"""
    full_response = main(query)
    
    if "FINAL ANSWER:" in full_response:
        final_answer = full_response.split("FINAL ANSWER:")[-1].strip()
        return final_answer
    else:
        return full_response.strip()

if __name__ == "__main__":
    # Test the LangGraph system - CLEAN OUTPUT ONLY
    result = get_final_answer("What are the names of the US presidents who were assassinated?")
    print(result)