josondev commited on
Commit
7c04f3e
·
verified ·
1 Parent(s): 6a304d6

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +188 -78
veryfinal.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, json
2
  from dotenv import load_dotenv
3
 
4
  # Load environment variables
@@ -7,6 +7,8 @@ load_dotenv()
7
  # Imports
8
  from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
9
  from langchain_groq import ChatGroq
 
 
10
  from langchain_community.tools.tavily_search import TavilySearchResults
11
  from langchain_community.document_loaders import WikipediaLoader
12
  from langchain_community.document_loaders import ArxivLoader
@@ -19,6 +21,26 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
  from langchain_community.document_loaders import JSONLoader
20
  from langgraph.prebuilt import create_react_agent
21
  from langgraph.checkpoint.memory import MemorySaver
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Define all tools
24
  @tool
@@ -78,14 +100,17 @@ def wiki_search(query: str) -> str:
78
  args:
79
  query: the query to search for
80
  """
81
- loader = WikipediaLoader(query=query, load_max_docs=1)
82
- data = loader.load()
83
- formatted_search_docs = "\n\n---\n\n".join(
84
- [
85
- f'\n{doc.page_content}\n'
86
- for doc in data
87
- ])
88
- return formatted_search_docs
 
 
 
89
 
90
  @tool
91
  def web_search(query: str) -> str:
@@ -94,13 +119,18 @@ def web_search(query: str) -> str:
94
  Args:
95
  query: The search query.
96
  """
97
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
98
- formatted_search_docs = "\n\n---\n\n".join(
99
- [
100
- f'\n{doc.get("content", "")}\n'
101
- for doc in search_docs
102
- ])
103
- return formatted_search_docs
 
 
 
 
 
104
 
105
  @tool
106
  def arxiv_search(query: str) -> str:
@@ -109,13 +139,16 @@ def arxiv_search(query: str) -> str:
109
  Args:
110
  query: The search query.
111
  """
112
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
113
- formatted_search_docs = "\n\n---\n\n".join(
114
- [
115
- f'\n{doc.page_content[:1000]}\n'
116
- for doc in search_docs
117
- ])
118
- return formatted_search_docs
 
 
 
119
 
120
  # Load and process your JSONL data
121
  jq_schema = """
@@ -146,8 +179,57 @@ json_chunks = text_splitter.split_documents(json_docs)
146
  # Create vector database
147
  database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
148
 
149
- # Initialize LLM
150
- llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", temperature=0,api_key=os.getenv("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  # Create retriever and retriever tool
153
  retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
@@ -181,62 +263,90 @@ agent_executor = create_react_agent(
181
  checkpointer=memory
182
  )
183
 
184
- # Function to run the agent
185
- def run_agent(query, thread_id="conversation_1"):
186
- """Run the agent with a query"""
187
- config = {"configurable": {"thread_id": thread_id}}
188
 
189
- system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
190
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
191
- FINAL ANSWER: [YOUR FINAL ANSWER].
192
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
193
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
194
-
195
- user_msg = HumanMessage(content=query)
196
-
197
- print(f"User: {query}")
198
- print("\nAgent:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- for step in agent_executor.stream(
201
- {"messages": [system_msg, user_msg]},
202
- config,
203
- stream_mode="values"
204
- ):
205
- step["messages"][-1].pretty_print()
206
 
207
- # Function to run agent with error handling
208
- def robust_agent_run(query, thread_id="robust_conversation"):
209
- """Run agent with error handling"""
210
- config = {"configurable": {"thread_id": thread_id}}
211
-
212
- try:
213
- system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
214
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
215
- FINAL ANSWER: [YOUR FINAL ANSWER].
216
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
217
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
218
-
219
- user_msg = HumanMessage(content=query)
220
- result = []
221
-
222
- for step in agent_executor.stream(
223
- {"messages": [system_msg, user_msg]},
224
- config,
225
- stream_mode="values"
226
- ):
227
- result = step["messages"]
228
-
229
- return result[-1].content if result else "No response generated"
230
-
231
- except Exception as e:
232
- return f"Error occurred: {str(e)}"
233
 
234
- # Main function
235
  def main(query: str) -> str:
236
- """Main function to run the agent"""
237
- return(robust_agent_run(query))
238
-
239
-
240
 
241
- # Or use the interactive version
242
- # run_agent("What is 25 * 4 + 10?")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, time, random
2
  from dotenv import load_dotenv
3
 
4
  # Load environment variables
 
7
  # Imports
8
  from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
9
  from langchain_groq import ChatGroq
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
12
  from langchain_community.tools.tavily_search import TavilySearchResults
13
  from langchain_community.document_loaders import WikipediaLoader
14
  from langchain_community.document_loaders import ArxivLoader
 
21
  from langchain_community.document_loaders import JSONLoader
22
  from langgraph.prebuilt import create_react_agent
23
  from langgraph.checkpoint.memory import MemorySaver
24
+ from langchain_core.rate_limiters import InMemoryRateLimiter
25
+
26
+ # Rate limiters for different providers
27
+ groq_rate_limiter = InMemoryRateLimiter(
28
+ requests_per_second=0.5, # 30 requests per minute
29
+ check_every_n_seconds=0.1,
30
+ max_bucket_size=10
31
+ )
32
+
33
+ google_rate_limiter = InMemoryRateLimiter(
34
+ requests_per_second=0.33, # 20 requests per minute
35
+ check_every_n_seconds=0.1,
36
+ max_bucket_size=10
37
+ )
38
+
39
+ nvidia_rate_limiter = InMemoryRateLimiter(
40
+ requests_per_second=0.25, # 15 requests per minute
41
+ check_every_n_seconds=0.1,
42
+ max_bucket_size=10
43
+ )
44
 
45
  # Define all tools
46
  @tool
 
100
  args:
101
  query: the query to search for
102
  """
103
+ try:
104
+ loader = WikipediaLoader(query=query, load_max_docs=1)
105
+ data = loader.load()
106
+ formatted_search_docs = "\n\n---\n\n".join(
107
+ [
108
+ f'\n{doc.page_content}\n'
109
+ for doc in data
110
+ ])
111
+ return formatted_search_docs
112
+ except Exception as e:
113
+ return f"Wikipedia search failed: {str(e)}"
114
 
115
  @tool
116
  def web_search(query: str) -> str:
 
119
  Args:
120
  query: The search query.
121
  """
122
+ try:
123
+ # Add delay to prevent rate limiting
124
+ time.sleep(random.uniform(1, 3))
125
+ search_docs = TavilySearchResults(max_results=3).invoke(query=query)
126
+ formatted_search_docs = "\n\n---\n\n".join(
127
+ [
128
+ f'\n{doc.get("content", "")}\n'
129
+ for doc in search_docs
130
+ ])
131
+ return formatted_search_docs
132
+ except Exception as e:
133
+ return f"Web search failed: {str(e)}"
134
 
135
  @tool
136
  def arxiv_search(query: str) -> str:
 
139
  Args:
140
  query: The search query.
141
  """
142
+ try:
143
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
144
+ formatted_search_docs = "\n\n---\n\n".join(
145
+ [
146
+ f'\n{doc.page_content[:1000]}\n'
147
+ for doc in search_docs
148
+ ])
149
+ return formatted_search_docs
150
+ except Exception as e:
151
+ return f"ArXiv search failed: {str(e)}"
152
 
153
  # Load and process your JSONL data
154
  jq_schema = """
 
179
  # Create vector database
180
  database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
181
 
182
+ # Initialize LLMs with rate limiting
183
+ def create_rate_limited_llm(provider="groq"):
184
+ """Create rate-limited LLM based on provider"""
185
+
186
+ if provider == "groq":
187
+ return ChatGroq(
188
+ model="llama-3.3-70b-versatile",
189
+ temperature=0,
190
+ api_key=os.getenv("GROQ_API_KEY"),
191
+ rate_limiter=groq_rate_limiter,
192
+ max_retries=2,
193
+ request_timeout=60
194
+ )
195
+ elif provider == "google":
196
+ return ChatGoogleGenerativeAI(
197
+ model="gemini-2.0-flash-exp",
198
+ temperature=0,
199
+ api_key=os.getenv("GOOGLE_API_KEY"),
200
+ rate_limiter=google_rate_limiter,
201
+ max_retries=2,
202
+ request_timeout=60
203
+ )
204
+ elif provider == "nvidia":
205
+ return ChatNVIDIA(
206
+ model="meta/llama-3.1-405b-instruct",
207
+ temperature=0,
208
+ api_key=os.getenv("NVIDIA_API_KEY"),
209
+ rate_limiter=nvidia_rate_limiter,
210
+ max_retries=2
211
+ )
212
+
213
+ # Create fallback chain with exponential backoff
214
+ def create_llm_with_smart_fallbacks():
215
+ """Create LLM with intelligent fallback and rate limiting"""
216
+
217
+ # Primary: Groq (fastest)
218
+ primary_llm = create_rate_limited_llm("groq")
219
+
220
+ # Fallback 1: Google (most capable)
221
+ fallback_1 = create_rate_limited_llm("google")
222
+
223
+ # Fallback 2: NVIDIA (reliable)
224
+ fallback_2 = create_rate_limited_llm("nvidia")
225
+
226
+ # Create fallback chain
227
+ llm_with_fallbacks = primary_llm.with_fallbacks([fallback_1, fallback_2])
228
+
229
+ return llm_with_fallbacks
230
+
231
+ # Initialize LLM with smart fallbacks
232
+ llm = create_llm_with_smart_fallbacks()
233
 
234
  # Create retriever and retriever tool
235
  retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
 
263
  checkpointer=memory
264
  )
265
 
266
+ # Enhanced robust agent run with exponential backoff
267
+ def robust_agent_run(query, thread_id="robust_conversation", max_retries=3):
268
+ """Run agent with error handling, rate limiting, and exponential backoff"""
 
269
 
270
+ for attempt in range(max_retries):
271
+ try:
272
+ config = {"configurable": {"thread_id": f"{thread_id}_{attempt}"}}
273
+
274
+ system_msg = SystemMessage(content='''You are a helpful assistant tasked with answering questions using a set of tools.
275
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
276
+ FINAL ANSWER: [YOUR FINAL ANSWER].
277
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
278
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.''')
279
+
280
+ user_msg = HumanMessage(content=query)
281
+ result = []
282
+
283
+ print(f"Attempt {attempt + 1}: Processing query...")
284
+
285
+ for step in agent_executor.stream(
286
+ {"messages": [system_msg, user_msg]},
287
+ config,
288
+ stream_mode="values"
289
+ ):
290
+ result = step["messages"]
291
+
292
+ final_response = result[-1].content if result else "No response generated"
293
+ print(f"Query processed successfully on attempt {attempt + 1}")
294
+ return final_response
295
+
296
+ except Exception as e:
297
+ error_msg = str(e).lower()
298
+
299
+ # Check for rate limit errors
300
+ if any(keyword in error_msg for keyword in ['rate limit', 'too many requests', '429', 'quota exceeded']):
301
+ wait_time = (2 ** attempt) + random.uniform(1, 3) # Exponential backoff with jitter
302
+ print(f"Rate limit hit on attempt {attempt + 1}. Waiting {wait_time:.2f} seconds...")
303
+ time.sleep(wait_time)
304
+
305
+ if attempt == max_retries - 1:
306
+ return f"Rate limit exceeded after {max_retries} attempts: {str(e)}"
307
+ continue
308
+
309
+ # Check for other API errors
310
+ elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
311
+ wait_time = (2 ** attempt) + random.uniform(0.5, 1.5)
312
+ print(f"API error on attempt {attempt + 1}. Retrying in {wait_time:.2f} seconds...")
313
+ time.sleep(wait_time)
314
+
315
+ if attempt == max_retries - 1:
316
+ return f"API error after {max_retries} attempts: {str(e)}"
317
+ continue
318
+
319
+ else:
320
+ # Non-recoverable error
321
+ return f"Error occurred: {str(e)}"
322
 
323
+ return "Maximum retries exceeded"
 
 
 
 
 
324
 
325
+ # Main function with request tracking
326
+ request_count = 0
327
+ last_request_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
 
329
  def main(query: str) -> str:
330
+ """Main function to run the agent with request tracking"""
331
+ global request_count, last_request_time
332
+
333
+ current_time = time.time()
334
 
335
+ # Reset counter every minute
336
+ if current_time - last_request_time > 60:
337
+ request_count = 0
338
+ last_request_time = current_time
339
+
340
+ request_count += 1
341
+ print(f"Processing request #{request_count}")
342
+
343
+ # Add small delay between requests to prevent overwhelming APIs
344
+ if request_count > 1:
345
+ time.sleep(random.uniform(2, 5))
346
+
347
+ return robust_agent_run(query)
348
+
349
+ if __name__ == "__main__":
350
+ # Test the agent
351
+ result = main("What are the names of the US presidents who were assassinated?")
352
+ print(result)