josondev commited on
Commit
b1b6e20
·
verified ·
1 Parent(s): cd32eb4

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +227 -343
veryfinal.py CHANGED
@@ -1,26 +1,30 @@
 
1
  import os, time, random
2
  from dotenv import load_dotenv
3
  from typing import List, Dict, Any, TypedDict, Annotated
4
  import operator
5
 
6
- # Load environment variables
7
- load_dotenv()
8
-
9
  # LangGraph imports
10
- from langgraph.graph import StateGraph, END
11
- from langgraph.prebuilt import create_react_agent
 
12
  from langgraph.checkpoint.memory import MemorySaver
13
 
14
  # LangChain imports
15
- from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
16
  from langchain_core.tools import tool
17
  from langchain_groq import ChatGroq
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
  from langchain_nvidia_ai_endpoints import ChatNVIDIA
20
- from langchain_core.rate_limiters import InMemoryRateLimiter
 
 
 
 
 
 
21
 
22
- # Tavily import
23
- from tavily import TavilyClient
24
 
25
  # Advanced Rate Limiter (SILENT)
26
  class AdvancedRateLimiter:
@@ -41,385 +45,265 @@ class AdvancedRateLimiter:
41
  # Record this request
42
  self.request_times.append(current_time)
43
 
44
- # Initialize rate limiters for free tiers
45
  groq_limiter = AdvancedRateLimiter(requests_per_minute=30)
46
  gemini_limiter = AdvancedRateLimiter(requests_per_minute=2)
47
- nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5) # NVIDIA free tier
48
- tavily_limiter = AdvancedRateLimiter(requests_per_minute=50)
49
-
50
- # Initialize LangChain rate limiters for NVIDIA
51
- nvidia_rate_limiter = InMemoryRateLimiter(
52
- requests_per_second=0.083, # 5 requests per minute
53
- check_every_n_seconds=0.1,
54
- max_bucket_size=5
55
- )
56
-
57
- # Initialize LLMs with best free models
58
- groq_llm = ChatGroq(
59
- model="llama-3.3-70b-versatile",
60
- api_key=os.getenv("GROQ_API_KEY"),
61
- temperature=0
62
- )
63
-
64
- gemini_llm = ChatGoogleGenerativeAI(
65
- model="gemini-2.0-flash-thinking-exp",
66
- api_key=os.getenv("GOOGLE_API_KEY"),
67
- temperature=0
68
- )
69
-
70
- # Best NVIDIA models based on search results
71
- nvidia_general_llm = ChatNVIDIA(
72
- model="meta/llama3-70b-instruct", # Best general model from NVIDIA
73
- api_key=os.getenv("NVIDIA_API_KEY"),
74
- temperature=0,
75
- max_tokens=4000,
76
- rate_limiter=nvidia_rate_limiter
77
- )
78
-
79
- nvidia_code_llm = ChatNVIDIA(
80
- model="meta/codellama-70b", # Best code generation model from NVIDIA
81
- api_key=os.getenv("NVIDIA_API_KEY"),
82
- temperature=0,
83
- max_tokens=4000,
84
- rate_limiter=nvidia_rate_limiter
85
- )
86
-
87
- nvidia_math_llm = ChatNVIDIA(
88
- model="mistralai/mixtral-8x22b-instruct-v0.1", # Best reasoning model from NVIDIA
89
- api_key=os.getenv("NVIDIA_API_KEY"),
90
- temperature=0,
91
- max_tokens=4000,
92
- rate_limiter=nvidia_rate_limiter
93
- )
94
-
95
- # Initialize Tavily client
96
- tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
97
-
98
- # Define State
99
- class AgentState(TypedDict):
100
- messages: Annotated[List[HumanMessage | AIMessage], operator.add]
101
- query: str
102
- agent_type: str
103
- final_answer: str
104
 
105
  # Custom Tools
106
  @tool
107
- def multiply_tool(a: float, b: float) -> float:
108
- """Multiply two numbers together"""
 
 
 
 
109
  return a * b
110
 
111
  @tool
112
- def add_tool(a: float, b: float) -> float:
113
- """Add two numbers together"""
 
 
 
 
 
114
  return a + b
115
 
116
  @tool
117
- def subtract_tool(a: float, b: float) -> float:
118
- """Subtract two numbers"""
 
 
 
 
 
119
  return a - b
120
 
121
  @tool
122
- def divide_tool(a: float, b: float) -> float:
123
- """Divide two numbers"""
 
 
 
 
 
124
  if b == 0:
125
  raise ValueError("Cannot divide by zero.")
126
  return a / b
127
 
128
  @tool
129
- def tavily_search_tool(query: str) -> str:
130
- """Search the web using Tavily for current information"""
131
- try:
132
- tavily_limiter.wait_if_needed()
133
- response = tavily_client.search(
134
- query=query,
135
- max_results=3,
136
- search_depth="basic",
137
- include_answer=False
138
- )
139
-
140
- # Format results
141
- results = []
142
- for result in response.get('results', []):
143
- results.append(f"Title: {result.get('title', '')}\nContent: {result.get('content', '')}")
144
-
145
- return "\n\n---\n\n".join(results)
146
-
147
- except Exception as e:
148
- return f"Tavily search failed: {str(e)}"
149
 
150
  @tool
151
- def wiki_search_tool(query: str) -> str:
152
- """Search Wikipedia for encyclopedic information"""
 
 
 
153
  try:
154
  time.sleep(random.uniform(1, 3))
155
- from langchain_community.document_loaders import WikipediaLoader
156
- loader = WikipediaLoader(query=query, load_max_docs=1)
157
- data = loader.load()
158
- return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data])
 
 
 
159
  except Exception as e:
160
  return f"Wikipedia search failed: {str(e)}"
161
 
162
- # Define tools for each agent type
163
- math_tools = [multiply_tool, add_tool, subtract_tool, divide_tool]
164
- research_tools = [tavily_search_tool, wiki_search_tool]
165
- coordinator_tools = [tavily_search_tool, wiki_search_tool]
166
-
167
- # Node functions
168
- def router_node(state: AgentState) -> AgentState:
169
- """Route queries to appropriate agent type"""
170
- query = state["query"].lower()
171
-
172
- if any(word in query for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']):
173
- agent_type = "math"
174
- elif any(word in query for word in ['code', 'program', 'python', 'javascript', 'function', 'algorithm']):
175
- agent_type = "code"
176
- elif any(word in query for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']):
177
- agent_type = "research"
178
- else:
179
- agent_type = "coordinator"
180
-
181
- return {**state, "agent_type": agent_type}
182
-
183
- def math_agent_node(state: AgentState) -> AgentState:
184
- """Mathematical specialist agent using NVIDIA Mixtral"""
185
- nvidia_limiter.wait_if_needed()
186
-
187
- system_message = SystemMessage(content="""You are a mathematical specialist with access to calculation tools.
188
- Use the appropriate math tools for calculations.
189
- Show your work step by step.
190
- Always provide precise numerical answers.
191
- Finish with: FINAL ANSWER: [numerical result]""")
192
-
193
- # Create math agent with NVIDIA's best reasoning model
194
- math_agent = create_react_agent(nvidia_math_llm, math_tools)
195
-
196
- # Process query
197
- messages = [system_message, HumanMessage(content=state["query"])]
198
- config = {"configurable": {"thread_id": "math_thread"}}
199
 
 
 
200
  try:
201
- result = math_agent.invoke({"messages": messages}, config)
202
- final_message = result["messages"][-1].content
203
-
204
- return {
205
- **state,
206
- "messages": state["messages"] + [AIMessage(content=final_message)],
207
- "final_answer": final_message
208
- }
209
  except Exception as e:
210
- error_msg = f"Math agent error: {str(e)}"
211
- return {
212
- **state,
213
- "messages": state["messages"] + [AIMessage(content=error_msg)],
214
- "final_answer": error_msg
215
- }
216
 
217
- def code_agent_node(state: AgentState) -> AgentState:
218
- """Code generation specialist agent using NVIDIA CodeLlama"""
219
- nvidia_limiter.wait_if_needed()
220
-
221
- system_message = SystemMessage(content="""You are an expert coding AI specialist.
222
- Generate clean, efficient, and well-documented code.
223
- Explain your code solutions clearly.
224
- Always provide working code examples.
225
- Finish with: FINAL ANSWER: [your code solution]""")
226
-
227
- # Create code agent with NVIDIA's best code model
228
- code_agent = create_react_agent(nvidia_code_llm, [])
229
-
230
- # Process query
231
- messages = [system_message, HumanMessage(content=state["query"])]
232
- config = {"configurable": {"thread_id": "code_thread"}}
233
 
 
 
234
  try:
235
- result = code_agent.invoke({"messages": messages}, config)
236
- final_message = result["messages"][-1].content
237
-
238
- return {
239
- **state,
240
- "messages": state["messages"] + [AIMessage(content=final_message)],
241
- "final_answer": final_message
242
- }
243
  except Exception as e:
244
- error_msg = f"Code agent error: {str(e)}"
245
- return {
246
- **state,
247
- "messages": state["messages"] + [AIMessage(content=error_msg)],
248
- "final_answer": error_msg
249
- }
250
 
251
- def research_agent_node(state: AgentState) -> AgentState:
252
- """Research specialist agent using Gemini"""
253
- gemini_limiter.wait_if_needed()
254
-
255
- system_message = SystemMessage(content="""You are a research specialist with access to web search and Wikipedia.
256
- Use appropriate search tools to gather comprehensive information.
257
- Always cite sources and provide well-researched answers.
258
- Synthesize information from multiple sources when possible.
259
- Finish with: FINAL ANSWER: [your researched answer]""")
260
-
261
- # Create research agent
262
- research_agent = create_react_agent(gemini_llm, research_tools)
263
-
264
- # Process query
265
- messages = [system_message, HumanMessage(content=state["query"])]
266
- config = {"configurable": {"thread_id": "research_thread"}}
267
-
268
  try:
269
- result = research_agent.invoke({"messages": messages}, config)
270
- final_message = result["messages"][-1].content
271
-
272
- return {
273
- **state,
274
- "messages": state["messages"] + [AIMessage(content=final_message)],
275
- "final_answer": final_message
 
 
 
 
 
 
 
276
  }
277
- except Exception as e:
278
- error_msg = f"Research agent error: {str(e)}"
279
- return {
280
- **state,
281
- "messages": state["messages"] + [AIMessage(content=error_msg)],
282
- "final_answer": error_msg
283
- }
284
-
285
- def coordinator_agent_node(state: AgentState) -> AgentState:
286
- """Coordinator agent using NVIDIA Llama3"""
287
- nvidia_limiter.wait_if_needed()
288
-
289
- system_message = SystemMessage(content="""You are the main coordinator agent.
290
- Analyze queries and provide comprehensive responses.
291
- Use search tools for factual information when needed.
292
- Always finish with: FINAL ANSWER: [your final answer]""")
293
-
294
- # Create coordinator agent with NVIDIA's best general model
295
- coordinator_agent = create_react_agent(nvidia_general_llm, coordinator_tools)
296
-
297
- # Process query
298
- messages = [system_message, HumanMessage(content=state["query"])]
299
- config = {"configurable": {"thread_id": "coordinator_thread"}}
300
-
301
- try:
302
- result = coordinator_agent.invoke({"messages": messages}, config)
303
- final_message = result["messages"][-1].content
304
 
305
- return {
306
- **state,
307
- "messages": state["messages"] + [AIMessage(content=final_message)],
308
- "final_answer": final_message
309
- }
 
 
 
 
 
 
 
 
 
 
 
310
  except Exception as e:
311
- error_msg = f"Coordinator agent error: {str(e)}"
312
- return {
313
- **state,
314
- "messages": state["messages"] + [AIMessage(content=error_msg)],
315
- "final_answer": error_msg
316
- }
 
 
 
 
 
 
 
 
 
 
317
 
318
- # Conditional routing function
319
- def route_agent(state: AgentState) -> str:
320
- """Route to appropriate agent based on agent_type"""
321
- agent_type = state.get("agent_type", "coordinator")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
- if agent_type == "math":
324
- return "math_agent"
325
- elif agent_type == "code":
326
- return "code_agent"
327
- elif agent_type == "research":
328
- return "research_agent"
 
329
  else:
330
- return "coordinator_agent"
331
-
332
- # LangGraph Multi-Agent System
333
- class LangGraphMultiAgentSystem:
334
- def __init__(self):
335
- self.request_count = 0
336
- self.last_request_time = time.time()
337
- self.graph = self._create_graph()
338
 
339
- def _create_graph(self) -> StateGraph:
340
- """Create the LangGraph workflow"""
341
- workflow = StateGraph(AgentState)
342
-
343
- # Add nodes
344
- workflow.add_node("router", router_node)
345
- workflow.add_node("math_agent", math_agent_node)
346
- workflow.add_node("code_agent", code_agent_node)
347
- workflow.add_node("research_agent", research_agent_node)
348
- workflow.add_node("coordinator_agent", coordinator_agent_node)
349
-
350
- # Add edges
351
- workflow.set_entry_point("router")
352
- workflow.add_conditional_edges(
353
- "router",
354
- route_agent,
355
- {
356
- "math_agent": "math_agent",
357
- "code_agent": "code_agent",
358
- "research_agent": "research_agent",
359
- "coordinator_agent": "coordinator_agent"
360
- }
361
- )
362
-
363
- # All agents end the workflow
364
- workflow.add_edge("math_agent", END)
365
- workflow.add_edge("code_agent", END)
366
- workflow.add_edge("research_agent", END)
367
- workflow.add_edge("coordinator_agent", END)
368
-
369
- # Compile the graph
370
- memory = MemorySaver()
371
- return workflow.compile(checkpointer=memory)
372
 
373
- def process_query(self, query: str) -> str:
374
- """Process query using LangGraph multi-agent system"""
375
- # Global rate limiting (SILENT)
376
- current_time = time.time()
377
- if current_time - self.last_request_time > 3600:
378
- self.request_count = 0
379
- self.last_request_time = current_time
380
-
381
- self.request_count += 1
382
-
383
- # Add delay between requests (SILENT)
384
- if self.request_count > 1:
385
- time.sleep(random.uniform(3, 10))
386
-
387
- # Initial state
388
- initial_state = {
389
- "messages": [HumanMessage(content=query)],
390
- "query": query,
391
- "agent_type": "",
392
- "final_answer": ""
393
- }
394
-
395
- # Configuration for the graph
396
- config = {"configurable": {"thread_id": f"thread_{self.request_count}"}}
397
 
398
- try:
399
- # Run the graph
400
- final_state = self.graph.invoke(initial_state, config)
401
- return final_state.get("final_answer", "No response generated")
402
-
403
- except Exception as e:
404
- return f"Error: {str(e)}"
405
 
406
- # Main functions
407
- def main(query: str) -> str:
408
- """Main function using LangGraph multi-agent system"""
409
- langgraph_system = LangGraphMultiAgentSystem()
410
- return langgraph_system.process_query(query)
 
 
 
 
411
 
412
- def get_final_answer(query: str) -> str:
413
- """Extract only the FINAL ANSWER from the response"""
414
- full_response = main(query)
415
-
416
- if "FINAL ANSWER:" in full_response:
417
- final_answer = full_response.split("FINAL ANSWER:")[-1].strip()
418
- return final_answer
419
- else:
420
- return full_response.strip()
421
 
 
422
  if __name__ == "__main__":
423
- # Test the LangGraph system - CLEAN OUTPUT ONLY
424
- result = get_final_answer("What are the names of the US presidents who were assassinated?")
425
- print(result)
 
 
 
 
 
 
 
1
+ """LangGraph Agent with FAISS Vector Store and Custom Tools"""
2
  import os, time, random
3
  from dotenv import load_dotenv
4
  from typing import List, Dict, Any, TypedDict, Annotated
5
  import operator
6
 
 
 
 
7
  # LangGraph imports
8
+ from langgraph.graph import START, StateGraph, MessagesState
9
+ from langgraph.prebuilt import tools_condition
10
+ from langgraph.prebuilt import ToolNode
11
  from langgraph.checkpoint.memory import MemorySaver
12
 
13
  # LangChain imports
14
+ from langchain_core.messages import SystemMessage, HumanMessage
15
  from langchain_core.tools import tool
16
  from langchain_groq import ChatGroq
17
  from langchain_google_genai import ChatGoogleGenerativeAI
18
  from langchain_nvidia_ai_endpoints import ChatNVIDIA
19
+ from langchain_community.tools.tavily_search import TavilySearchResults
20
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
21
+ from langchain_community.vectorstores import FAISS
22
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
23
+ from langchain.tools.retriever import create_retriever_tool
24
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
25
+ from langchain_community.document_loaders import JSONLoader
26
 
27
+ load_dotenv()
 
28
 
29
  # Advanced Rate Limiter (SILENT)
30
  class AdvancedRateLimiter:
 
45
  # Record this request
46
  self.request_times.append(current_time)
47
 
48
+ # Initialize rate limiters
49
  groq_limiter = AdvancedRateLimiter(requests_per_minute=30)
50
  gemini_limiter = AdvancedRateLimiter(requests_per_minute=2)
51
+ nvidia_limiter = AdvancedRateLimiter(requests_per_minute=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Custom Tools
54
  @tool
55
+ def multiply(a: int, b: int) -> int:
56
+ """Multiply two numbers.
57
+ Args:
58
+ a: first int
59
+ b: second int
60
+ """
61
  return a * b
62
 
63
  @tool
64
+ def add(a: int, b: int) -> int:
65
+ """Add two numbers.
66
+
67
+ Args:
68
+ a: first int
69
+ b: second int
70
+ """
71
  return a + b
72
 
73
  @tool
74
+ def subtract(a: int, b: int) -> int:
75
+ """Subtract two numbers.
76
+
77
+ Args:
78
+ a: first int
79
+ b: second int
80
+ """
81
  return a - b
82
 
83
  @tool
84
+ def divide(a: int, b: int) -> float:
85
+ """Divide two numbers.
86
+
87
+ Args:
88
+ a: first int
89
+ b: second int
90
+ """
91
  if b == 0:
92
  raise ValueError("Cannot divide by zero.")
93
  return a / b
94
 
95
  @tool
96
+ def modulus(a: int, b: int) -> int:
97
+ """Get the modulus of two numbers.
98
+
99
+ Args:
100
+ a: first int
101
+ b: second int
102
+ """
103
+ return a % b
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  @tool
106
+ def wiki_search(query: str) -> str:
107
+ """Search Wikipedia for a query and return maximum 2 results.
108
+
109
+ Args:
110
+ query: The search query."""
111
  try:
112
  time.sleep(random.uniform(1, 3))
113
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
114
+ formatted_search_docs = "\n\n---\n\n".join(
115
+ [
116
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
117
+ for doc in search_docs
118
+ ])
119
+ return formatted_search_docs
120
  except Exception as e:
121
  return f"Wikipedia search failed: {str(e)}"
122
 
123
+ @tool
124
+ def web_search(query: str) -> str:
125
+ """Search Tavily for a query and return maximum 3 results.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ Args:
128
+ query: The search query."""
129
  try:
130
+ time.sleep(random.uniform(2, 5))
131
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
132
+ formatted_search_docs = "\n\n---\n\n".join(
133
+ [
134
+ f'<Document source="{doc.get("url", "")}" />\n{doc.get("content", "")}\n</Document>'
135
+ for doc in search_docs
136
+ ])
137
+ return formatted_search_docs
138
  except Exception as e:
139
+ return f"Web search failed: {str(e)}"
 
 
 
 
 
140
 
141
+ @tool
142
+ def arvix_search(query: str) -> str:
143
+ """Search Arxiv for a query and return maximum 3 result.
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ Args:
146
+ query: The search query."""
147
  try:
148
+ time.sleep(random.uniform(1, 4))
149
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
150
+ formatted_search_docs = "\n\n---\n\n".join(
151
+ [
152
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
153
+ for doc in search_docs
154
+ ])
155
+ return formatted_search_docs
156
  except Exception as e:
157
+ return f"ArXiv search failed: {str(e)}"
 
 
 
 
 
158
 
159
+ # Load and process JSONL data for FAISS vector store
160
+ def setup_faiss_vector_store():
161
+ """Setup FAISS vector database from JSONL metadata"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  try:
163
+ jq_schema = """
164
+ {
165
+ page_content: .Question,
166
+ metadata: {
167
+ task_id: .task_id,
168
+ Level: .Level,
169
+ Final_answer: ."Final answer",
170
+ file_name: .file_name,
171
+ Steps: .["Annotator Metadata"].Steps,
172
+ Number_of_steps: .["Annotator Metadata"]["Number of steps"],
173
+ How_long: .["Annotator Metadata"]["How long did this take?"],
174
+ Tools: .["Annotator Metadata"].Tools,
175
+ Number_of_tools: .["Annotator Metadata"]["Number of tools"]
176
+ }
177
  }
178
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ # Load documents
181
+ json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
182
+ json_docs = json_loader.load()
183
+
184
+ # Split documents
185
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
186
+ json_chunks = text_splitter.split_documents(json_docs)
187
+
188
+ # Create FAISS vector store
189
+ embeddings = NVIDIAEmbeddings(
190
+ model="nvidia/nv-embedqa-e5-v5",
191
+ api_key=os.getenv("NVIDIA_API_KEY")
192
+ )
193
+ vector_store = FAISS.from_documents(json_chunks, embeddings)
194
+
195
+ return vector_store
196
  except Exception as e:
197
+ print(f"FAISS vector store setup failed: {e}")
198
+ return None
199
+
200
+ # Load system prompt
201
+ try:
202
+ with open("system_prompt.txt", "r", encoding="utf-8") as f:
203
+ system_prompt = f.read()
204
+ except FileNotFoundError:
205
+ system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools.
206
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
207
+ FINAL ANSWER: [YOUR FINAL ANSWER].
208
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
209
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer."""
210
+
211
+ # System message
212
+ sys_msg = SystemMessage(content=system_prompt)
213
 
214
+ # Setup FAISS vector store and retriever
215
+ vector_store = setup_faiss_vector_store()
216
+ if vector_store:
217
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
218
+ retriever_tool = create_retriever_tool(
219
+ retriever=retriever,
220
+ name="Question_Search",
221
+ description="A tool to retrieve similar questions from a vector store.",
222
+ )
223
+ else:
224
+ retriever_tool = None
225
+
226
+ # All tools
227
+ all_tools = [
228
+ multiply,
229
+ add,
230
+ subtract,
231
+ divide,
232
+ modulus,
233
+ wiki_search,
234
+ web_search,
235
+ arvix_search,
236
+ ]
237
+
238
+ if retriever_tool:
239
+ all_tools.append(retriever_tool)
240
+
241
+ # Build graph function
242
+ def build_graph(provider: str = "groq"):
243
+ """Build the LangGraph with rate limiting"""
244
 
245
+ # Initialize LLMs with best free models
246
+ if provider == "google":
247
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-thinking-exp", temperature=0)
248
+ elif provider == "groq":
249
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
250
+ elif provider == "nvidia":
251
+ llm = ChatNVIDIA(model="meta/llama-3.1-70b-instruct", temperature=0)
252
  else:
253
+ raise ValueError("Invalid provider. Choose 'google', 'groq' or 'nvidia'.")
 
 
 
 
 
 
 
254
 
255
+ # Bind tools to LLM
256
+ llm_with_tools = llm.bind_tools(all_tools)
257
+
258
+ # Node functions
259
+ def assistant(state: MessagesState):
260
+ """Assistant node with rate limiting"""
261
+ if provider == "groq":
262
+ groq_limiter.wait_if_needed()
263
+ elif provider == "google":
264
+ gemini_limiter.wait_if_needed()
265
+ elif provider == "nvidia":
266
+ nvidia_limiter.wait_if_needed()
267
+
268
+ return {"messages": [llm_with_tools.invoke(state["messages"])]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
+ def retriever_node(state: MessagesState):
271
+ """Retriever node"""
272
+ if vector_store and len(state["messages"]) > 0:
273
+ try:
274
+ similar_questions = vector_store.similarity_search(state["messages"][-1].content, k=1)
275
+ if similar_questions:
276
+ example_msg = HumanMessage(
277
+ content=f"Here I provide a similar question and answer for reference: \n\n{similar_questions[0].page_content}",
278
+ )
279
+ return {"messages": [sys_msg] + state["messages"] + [example_msg]}
280
+ except Exception as e:
281
+ print(f"Retriever error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
+ return {"messages": [sys_msg] + state["messages"]}
 
 
 
 
 
 
284
 
285
+ # Build graph
286
+ builder = StateGraph(MessagesState)
287
+ builder.add_node("retriever", retriever_node)
288
+ builder.add_node("assistant", assistant)
289
+ builder.add_node("tools", ToolNode(all_tools))
290
+ builder.add_edge(START, "retriever")
291
+ builder.add_edge("retriever", "assistant")
292
+ builder.add_conditional_edges("assistant", tools_condition)
293
+ builder.add_edge("tools", "assistant")
294
 
295
+ # Compile graph with memory
296
+ memory = MemorySaver()
297
+ return builder.compile(checkpointer=memory)
 
 
 
 
 
 
298
 
299
+ # Test
300
  if __name__ == "__main__":
301
+ question = "What are the names of the US presidents who were assassinated?"
302
+ # Build the graph
303
+ graph = build_graph(provider="groq")
304
+ # Run the graph
305
+ messages = [HumanMessage(content=question)]
306
+ config = {"configurable": {"thread_id": "test_thread"}}
307
+ result = graph.invoke({"messages": messages}, config)
308
+ for m in result["messages"]:
309
+ m.pretty_print()