josondev commited on
Commit
0f81d99
·
verified ·
1 Parent(s): 0e78db3

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +234 -331
veryfinal.py CHANGED
@@ -1,373 +1,276 @@
1
- import os, json, time, random
2
  from dotenv import load_dotenv
 
3
 
4
  # Load environment variables
5
  load_dotenv()
6
 
7
- # Imports
8
- from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
9
- from langchain_groq import ChatGroq
10
- from langchain_nvidia_ai_endpoints import ChatNVIDIA
11
- from langchain_community.tools.tavily_search import TavilySearchResults
12
- from langchain_community.document_loaders import WikipediaLoader
13
- from langchain_community.document_loaders import ArxivLoader
14
- from langchain_community.vectorstores import FAISS
15
- from langchain_core.messages import SystemMessage, HumanMessage
16
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
17
- from langchain_core.tools import tool
18
- from langchain.tools.retriever import create_retriever_tool
19
- from langchain_text_splitters import RecursiveCharacterTextSplitter
20
- from langchain_community.document_loaders import JSONLoader
21
- from langgraph.prebuilt import create_react_agent
22
- from langgraph.checkpoint.memory import MemorySaver
23
- from langchain_core.rate_limiters import InMemoryRateLimiter
24
-
25
- # Rate limiters for different providers
26
- groq_rate_limiter = InMemoryRateLimiter(
27
- requests_per_second=0.5, # 30 requests per minute
28
- check_every_n_seconds=0.1,
29
- max_bucket_size=10
30
- )
31
-
32
- google_rate_limiter = InMemoryRateLimiter(
33
- requests_per_second=0.33, # 20 requests per minute
34
- check_every_n_seconds=0.1,
35
- max_bucket_size=10
36
- )
37
-
38
- nvidia_rate_limiter = InMemoryRateLimiter(
39
- requests_per_second=0.25, # 15 requests per minute
40
- check_every_n_seconds=0.1,
41
- max_bucket_size=10
42
- )
43
 
44
- # Initialize individual LLMs
45
- groq_llm = ChatGroq(
46
- model="llama-3.3-70b-versatile",
47
- temperature=0,
48
- api_key=os.getenv("GROQ_API_KEY"),
49
- rate_limiter=groq_rate_limiter,
50
- max_retries=2,
51
- request_timeout=60
52
- )
53
-
54
- nvidia_llm = ChatNVIDIA(
55
- model="meta/llama-3.1-405b-instruct",
56
- temperature=0,
57
- api_key=os.getenv("NVIDIA_API_KEY"),
58
- rate_limiter=nvidia_rate_limiter,
59
- max_retries=2
60
- )
61
-
62
- # Create LLM tools that can be selected by the agent
63
- @tool
64
- def groq_reasoning_tool(query: str) -> str:
65
- """Use Groq's Llama model for fast reasoning, mathematical calculations, and logical problems.
66
- Best for: Math problems, logical reasoning, quick calculations, code generation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- Args:
69
- query: The question or problem to solve
70
- """
71
- try:
72
- time.sleep(random.uniform(1, 2)) # Rate limiting
73
- response = groq_llm.invoke([HumanMessage(content=query)])
74
- return f"Groq Response: {response.content}"
75
- except Exception as e:
76
- return f"Groq tool failed: {str(e)}"
77
-
78
-
79
- @tool
80
- def nvidia_specialist_tool(query: str) -> str:
81
- """Use NVIDIA's large model for specialized tasks, technical questions, and domain expertise.
82
- Best for: Technical questions, specialized domains, scientific problems, detailed analysis.
83
 
84
- Args:
85
- query: The specialized question or technical problem
86
- """
87
- try:
88
- time.sleep(random.uniform(2, 4)) # Rate limiting
89
- response = nvidia_llm.invoke([HumanMessage(content=query)])
90
- return f"NVIDIA Response: {response.content}"
91
- except Exception as e:
92
- return f"NVIDIA tool failed: {str(e)}"
93
 
94
- # Define calculation tools
95
- @tool
96
- def multiply(a: int | float, b: int | float) -> int | float:
97
- """Multiply two numbers.
98
- Args:
99
- a: first int | float
100
- b: second int | float
101
- """
102
  return a * b
103
 
104
- @tool
105
- def add(a: int | float, b: int | float) -> int | float:
106
- """Add two numbers.
107
-
108
- Args:
109
- a: first int | float
110
- b: second int | float
111
- """
112
  return a + b
113
 
114
- @tool
115
- def subtract(a: int | float , b: int | float) -> int | float:
116
- """Subtract two numbers.
117
-
118
- Args:
119
- a: first int | float
120
- b: second int | float
121
- """
122
  return a - b
123
 
124
- @tool
125
- def divide(a: int | float, b: int | float) -> int | float:
126
- """Divide two numbers.
127
-
128
- Args:
129
- a: first int | float
130
- b: second int | float
131
- """
132
  if b == 0:
133
  raise ValueError("Cannot divide by zero.")
134
  return a / b
135
 
136
- @tool
137
- def modulus(a: int | float, b: int | float) -> int | float:
138
- """Get the modulus of two numbers.
139
-
140
- Args:
141
- a: first int | float
142
- b: second int | float
143
- """
144
- return a % b
145
 
146
- # Define search tools
147
- @tool
148
- def wiki_search(query: str) -> str:
149
- """Search the wikipedia for a query and return the first paragraph
150
- args:
151
- query: the query to search for
152
- """
153
  try:
 
154
  loader = WikipediaLoader(query=query, load_max_docs=1)
155
  data = loader.load()
156
- formatted_search_docs = "\n\n---\n\n".join(
157
- [
158
- f'\n{doc.page_content}\n'
159
- for doc in data
160
- ])
161
- return formatted_search_docs
162
  except Exception as e:
163
  return f"Wikipedia search failed: {str(e)}"
164
 
165
- @tool
166
- def web_search(query: str) -> str:
167
- """Search Tavily for a query and return maximum 3 results.
168
 
169
- Args:
170
- query: The search query.
171
- """
172
- try:
173
- time.sleep(random.uniform(1, 3))
174
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
175
- formatted_search_docs = "\n\n---\n\n".join(
176
- [
177
- f'\n{doc.get("content", "")}\n'
178
- for doc in search_docs
179
- ])
180
- return formatted_search_docs
181
- except Exception as e:
182
- return f"Web search failed: {str(e)}"
183
-
184
- @tool
185
- def arxiv_search(query: str) -> str:
186
- """Search Arxiv for a query and return maximum 3 result.
 
187
 
188
- Args:
189
- query: The search query.
190
- """
191
- try:
192
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
193
- formatted_search_docs = "\n\n---\n\n".join(
194
- [
195
- f'\n{doc.page_content[:1000]}\n'
196
- for doc in search_docs
197
- ])
198
- return formatted_search_docs
199
- except Exception as e:
200
- return f"ArXiv search failed: {str(e)}"
201
-
202
- # Load and process your JSONL data
203
- jq_schema = """
204
- {
205
- page_content: .Question,
206
- metadata: {
207
- task_id: .task_id,
208
- Level: .Level,
209
- Final_answer: ."Final answer",
210
- file_name: .file_name,
211
- Steps: .["Annotator Metadata"].Steps,
212
- Number_of_steps: .["Annotator Metadata"]["Number of steps"],
213
- How_long: .["Annotator Metadata"]["How long did this take?"],
214
- Tools: .["Annotator Metadata"].Tools,
215
- Number_of_tools: .["Annotator Metadata"]["Number of tools"]
216
- }
217
- }
218
- """
219
-
220
- # Load documents and create vector database
221
- json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
222
- json_docs = json_loader.load()
223
-
224
- # Split documents
225
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
226
- json_chunks = text_splitter.split_documents(json_docs)
227
-
228
- # Create vector database
229
- database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
230
-
231
- # Create retriever and retriever tool
232
- retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
233
-
234
- retriever_tool = create_retriever_tool(
235
- retriever=retriever,
236
- name="question_search",
237
- description="Search for similar questions and their solutions from the knowledge base."
238
- )
239
-
240
- # Combine all tools including LLM tools
241
- tools = [
242
- # Math tools
243
- multiply,
244
- add,
245
- subtract,
246
- divide,
247
- modulus,
248
 
249
- # Search tools
250
- wiki_search,
251
- web_search,
252
- arxiv_search,
253
- retriever_tool,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
- # LLM tools - agent can choose which LLM to use
256
- groq_reasoning_tool,
257
- nvidia_specialist_tool
258
- ]
259
-
260
- # Use a lightweight coordinator LLM (Groq for speed)
261
- coordinator_llm = ChatGroq(
262
- model="llama-3.3-70b-versatile",
263
- temperature=0,
264
- api_key=os.getenv("GROQ_API_KEY"),
265
- rate_limiter=groq_rate_limiter
266
- )
267
-
268
- # Create memory for conversation
269
- memory = MemorySaver()
270
-
271
- # Create the agent with coordinator LLM
272
- agent_executor = create_react_agent(
273
- model=coordinator_llm,
274
- tools=tools,
275
- checkpointer=memory
276
- )
277
-
278
- # Enhanced robust agent run
279
- def robust_agent_run(query, thread_id="robust_conversation", max_retries=3):
280
- """Run agent with error handling, rate limiting, and LLM tool selection"""
281
 
282
- for attempt in range(max_retries):
283
- try:
284
- config = {"configurable": {"thread_id": f"{thread_id}_{attempt}"}}
285
-
286
- system_msg = SystemMessage(content='''You are a helpful assistant with access to multiple specialized LLM tools and other utilities.
287
-
288
- AVAILABLE LLM TOOLS:
289
- - groq_reasoning_tool: Fast reasoning, math, calculations, code (use for quick logical problems)
290
- - google_analysis_tool: Complex analysis, creative tasks, detailed explanations (use for comprehensive analysis)
291
- - nvidia_specialist_tool: Technical questions, specialized domains, scientific problems (use for expert-level tasks)
292
-
293
- TOOL SELECTION STRATEGY:
294
- - For math/calculations: Use basic math tools (add, multiply, etc.) OR groq_reasoning_tool for complex math
295
- - For factual questions: Use web_search, wiki_search, or arxiv_search first
296
- - For analysis/reasoning: Choose the most appropriate LLM tool based on complexity
297
- - For technical/scientific: Use nvidia_specialist_tool
298
- - For creative/comprehensive: Use google_analysis_tool
299
- - For quick logical problems: Use groq_reasoning_tool
300
-
301
- Always finish with: FINAL ANSWER: [YOUR FINAL ANSWER]
302
- Your answer should be a number OR few words OR comma separated list as appropriate.''')
303
-
304
- user_msg = HumanMessage(content=query)
305
- result = []
306
-
307
- print(f"Attempt {attempt + 1}: Processing query with multi-LLM agent...")
308
-
309
- for step in agent_executor.stream(
310
- {"messages": [system_msg, user_msg]},
311
- config,
312
- stream_mode="values"
313
- ):
314
- result = step["messages"]
315
 
316
- final_response = result[-1].content if result else "No response generated"
317
- print(f"Query processed successfully on attempt {attempt + 1}")
318
- return final_response
319
-
320
- except Exception as e:
321
- error_msg = str(e).lower()
322
-
323
- if any(keyword in error_msg for keyword in ['rate limit', 'too many requests', '429', 'quota exceeded']):
324
- wait_time = (2 ** attempt) + random.uniform(1, 3)
325
- print(f"Rate limit hit on attempt {attempt + 1}. Waiting {wait_time:.2f} seconds...")
326
- time.sleep(wait_time)
327
 
328
- if attempt == max_retries - 1:
329
- return f"Rate limit exceeded after {max_retries} attempts: {str(e)}"
330
- continue
331
 
332
- elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
333
- wait_time = (2 ** attempt) + random.uniform(0.5, 1.5)
334
- print(f"API error on attempt {attempt + 1}. Retrying in {wait_time:.2f} seconds...")
335
- time.sleep(wait_time)
336
 
337
- if attempt == max_retries - 1:
338
- return f"API error after {max_retries} attempts: {str(e)}"
339
- continue
 
340
 
341
- else:
342
- return f"Error occurred: {str(e)}"
343
-
344
- return "Maximum retries exceeded"
345
-
346
- # Main function with request tracking
347
- request_count = 0
348
- last_request_time = time.time()
 
 
 
 
 
 
 
 
 
349
 
350
  def main(query: str) -> str:
351
- """Main function to run the multi-LLM agent"""
352
- global request_count, last_request_time
353
-
354
- current_time = time.time()
355
-
356
- # Reset counter every minute
357
- if current_time - last_request_time > 60:
358
- request_count = 0
359
- last_request_time = current_time
360
-
361
- request_count += 1
362
- print(f"Processing request #{request_count} with multi-LLM agent")
363
-
364
- # Add delay between requests
365
- if request_count > 1:
366
- time.sleep(random.uniform(2, 5))
367
 
368
- return robust_agent_run(query)
 
 
 
 
 
 
 
 
 
369
 
370
  if __name__ == "__main__":
371
- # Test the multi-LLM agent
372
- result = main("What are the names of the US presidents who were assassinated?")
373
  print(result)
 
1
+ import os, json, time, random, asyncio
2
  from dotenv import load_dotenv
3
+ from typing import Optional, Dict, Any
4
 
5
  # Load environment variables
6
  load_dotenv()
7
 
8
+ # Agno imports (corrected based on search results)
9
+ from agno.agent import Agent
10
+ from agno.models.groq import Groq
11
+ from agno.models.google import Gemini
12
+ from agno.tools.duckduckgo import DuckDuckGoTools
13
+ from agno.tools.yfinance import YFinanceTools
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Additional imports for custom tools
16
+ from langchain_community.tools.tavily_search import TavilySearchResults
17
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
18
+
19
+ # Advanced Rate Limiter with exponential backoff (SILENT)
20
+ class AdvancedRateLimiter:
21
+ def __init__(self, requests_per_minute: int, tokens_per_minute: int = None):
22
+ self.requests_per_minute = requests_per_minute
23
+ self.tokens_per_minute = tokens_per_minute
24
+ self.request_times = []
25
+ self.token_usage = []
26
+ self.consecutive_failures = 0
27
+
28
+ async def wait_if_needed(self, estimated_tokens: int = 1000):
29
+ current_time = time.time()
30
+
31
+ # Clean old requests (older than 1 minute)
32
+ self.request_times = [t for t in self.request_times if current_time - t < 60]
33
+ self.token_usage = [(t, tokens) for t, tokens in self.token_usage if current_time - t < 60]
34
+
35
+ # Calculate wait time for requests (SILENT)
36
+ if len(self.request_times) >= self.requests_per_minute:
37
+ wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
38
+ await asyncio.sleep(wait_time)
39
+
40
+ # Calculate wait time for tokens (SILENT)
41
+ if self.tokens_per_minute:
42
+ total_tokens = sum(tokens for _, tokens in self.token_usage)
43
+ if total_tokens + estimated_tokens > self.tokens_per_minute:
44
+ wait_time = 60 - (current_time - self.token_usage[0][0]) + random.uniform(3, 10)
45
+ await asyncio.sleep(wait_time)
46
+
47
+ # Add exponential backoff for consecutive failures (SILENT)
48
+ if self.consecutive_failures > 0:
49
+ backoff_time = min(2 ** self.consecutive_failures, 120) + random.uniform(2, 6)
50
+ await asyncio.sleep(backoff_time)
51
+
52
+ # Record this request
53
+ self.request_times.append(current_time)
54
+ if self.tokens_per_minute:
55
+ self.token_usage.append((current_time, estimated_tokens))
56
 
57
+ def record_success(self):
58
+ self.consecutive_failures = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ def record_failure(self):
61
+ self.consecutive_failures += 1
 
 
 
 
 
 
 
62
 
63
+ # Initialize rate limiters for free tiers
64
+ groq_limiter = AdvancedRateLimiter(requests_per_minute=30, tokens_per_minute=6000)
65
+ gemini_limiter = AdvancedRateLimiter(requests_per_minute=2, tokens_per_minute=32000)
66
+
67
+ # Custom tool functions with rate limiting (SILENT)
68
+ def multiply_tool(a: float, b: float) -> float:
69
+ """Multiply two numbers."""
 
70
  return a * b
71
 
72
+ def add_tool(a: float, b: float) -> float:
73
+ """Add two numbers."""
 
 
 
 
 
 
74
  return a + b
75
 
76
+ def subtract_tool(a: float, b: float) -> float:
77
+ """Subtract two numbers."""
 
 
 
 
 
 
78
  return a - b
79
 
80
+ def divide_tool(a: float, b: float) -> float:
81
+ """Divide two numbers."""
 
 
 
 
 
 
82
  if b == 0:
83
  raise ValueError("Cannot divide by zero.")
84
  return a / b
85
 
86
+ async def web_search_tool(query: str) -> str:
87
+ """Search the web using Tavily with rate limiting."""
88
+ try:
89
+ await asyncio.sleep(random.uniform(2, 5))
90
+ search_docs = TavilySearchResults(max_results=2).invoke(query=query)
91
+ return "\n\n---\n\n".join([doc.get("content", "") for doc in search_docs])
92
+ except Exception as e:
93
+ return f"Web search failed: {str(e)}"
 
94
 
95
+ async def wiki_search_tool(query: str) -> str:
96
+ """Search Wikipedia with rate limiting."""
 
 
 
 
 
97
  try:
98
+ await asyncio.sleep(random.uniform(1, 3))
99
  loader = WikipediaLoader(query=query, load_max_docs=1)
100
  data = loader.load()
101
+ return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data])
 
 
 
 
 
102
  except Exception as e:
103
  return f"Wikipedia search failed: {str(e)}"
104
 
105
+ # Create specialized Agno agents (SILENT)
106
+ def create_agno_agents():
107
+ """Create specialized Agno agents with the best free models"""
108
 
109
+ # Math specialist agent (using Groq for speed)
110
+ math_agent = Agent(
111
+ name="Math Specialist",
112
+ model=Groq(
113
+ id="llama-3.3-70b-versatile",
114
+ api_key=os.getenv("GROQ_API_KEY"),
115
+ temperature=0
116
+ ),
117
+ tools=[multiply_tool, add_tool, subtract_tool, divide_tool],
118
+ instructions=[
119
+ "You are a mathematical specialist with access to calculation tools.",
120
+ "Use the appropriate math tools for calculations.",
121
+ "Show your work step by step.",
122
+ "Always provide precise numerical answers.",
123
+ "Finish with: FINAL ANSWER: [numerical result]"
124
+ ],
125
+ show_tool_calls=False, # SILENT
126
+ markdown=False
127
+ )
128
 
129
+ # Research specialist agent (using Gemini for capability)
130
+ research_agent = Agent(
131
+ name="Research Specialist",
132
+ model=Gemini(
133
+ id="gemini-2.0-flash-thinking-exp",
134
+ api_key=os.getenv("GOOGLE_API_KEY"),
135
+ temperature=0
136
+ ),
137
+ tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
138
+ instructions=[
139
+ "You are a research specialist with access to multiple search tools.",
140
+ "Use appropriate search tools to gather comprehensive information.",
141
+ "Always cite sources and provide well-researched answers.",
142
+ "Synthesize information from multiple sources when possible.",
143
+ "Finish with: FINAL ANSWER: [your researched answer]"
144
+ ],
145
+ show_tool_calls=False, # SILENT
146
+ markdown=False
147
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # Coordinator agent (using Groq for fast coordination)
150
+ coordinator_agent = Agent(
151
+ name="Coordinator",
152
+ model=Groq(
153
+ id="llama-3.3-70b-versatile",
154
+ api_key=os.getenv("GROQ_API_KEY"),
155
+ temperature=0
156
+ ),
157
+ tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
158
+ instructions=[
159
+ "You are the main coordinator agent.",
160
+ "Analyze queries and provide comprehensive responses.",
161
+ "Use search tools for factual information when needed.",
162
+ "Route complex math to calculation tools.",
163
+ "Always finish with: FINAL ANSWER: [your final answer]"
164
+ ],
165
+ show_tool_calls=False, # SILENT
166
+ markdown=False
167
+ )
168
 
169
+ return {
170
+ "math": math_agent,
171
+ "research": research_agent,
172
+ "coordinator": coordinator_agent
173
+ }
174
+
175
+ # Main Agno multi-agent system (SILENT)
176
+ class AgnoMultiAgentSystem:
177
+ """Agno multi-agent system with comprehensive rate limiting"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ def __init__(self):
180
+ self.agents = create_agno_agents()
181
+ self.request_count = 0
182
+ self.last_request_time = time.time()
183
+
184
+ async def process_query(self, query: str, max_retries: int = 5) -> str:
185
+ """Process query using Agno agents with advanced rate limiting (SILENT)"""
186
+
187
+ # Global rate limiting (SILENT)
188
+ current_time = time.time()
189
+ if current_time - self.last_request_time > 3600:
190
+ self.request_count = 0
191
+ self.last_request_time = current_time
192
+
193
+ self.request_count += 1
194
+
195
+ # Add delay between requests (SILENT)
196
+ if self.request_count > 1:
197
+ await asyncio.sleep(random.uniform(3, 10))
198
+
199
+ for attempt in range(max_retries):
200
+ try:
201
+ # Route to appropriate agent based on query type (SILENT)
202
+ if any(word in query.lower() for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']):
203
+ response = self.agents["math"].run(query, stream=False)
204
+
205
+ elif any(word in query.lower() for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']):
206
+ response = self.agents["research"].run(query, stream=False)
207
+
208
+ else:
209
+ response = self.agents["coordinator"].run(query, stream=False)
 
 
210
 
211
+ return response.content if hasattr(response, 'content') else str(response)
 
 
 
 
 
 
 
 
 
 
212
 
213
+ except Exception as e:
214
+ error_msg = str(e).lower()
 
215
 
216
+ if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']):
217
+ wait_time = (2 ** attempt) + random.uniform(15, 45)
218
+ await asyncio.sleep(wait_time)
219
+ continue
220
 
221
+ elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
222
+ wait_time = (2 ** attempt) + random.uniform(5, 15)
223
+ await asyncio.sleep(wait_time)
224
+ continue
225
 
226
+ elif attempt == max_retries - 1:
227
+ try:
228
+ return self.agents["coordinator"].run(f"Answer this as best you can: {query}", stream=False)
229
+ except:
230
+ return f"Error: {str(e)}"
231
+
232
+ else:
233
+ wait_time = (2 ** attempt) + random.uniform(2, 8)
234
+ await asyncio.sleep(wait_time)
235
+
236
+ return "Maximum retries exceeded. Please try again later."
237
+
238
+ # SILENT main function
239
+ async def main_async(query: str) -> str:
240
+ """Async main function compatible with Jupyter notebooks (SILENT)"""
241
+ agno_system = AgnoMultiAgentSystem()
242
+ return await agno_system.process_query(query)
243
 
244
  def main(query: str) -> str:
245
+ """Main function using Agno multi-agent system (SILENT)"""
246
+ try:
247
+ loop = asyncio.get_event_loop()
248
+ if loop.is_running():
249
+ # For Jupyter notebooks
250
+ import nest_asyncio
251
+ nest_asyncio.apply()
252
+ return asyncio.run(main_async(query))
253
+ else:
254
+ return asyncio.run(main_async(query))
255
+ except RuntimeError:
256
+ return asyncio.run(main_async(query))
257
+
258
+ def get_final_answer(query: str) -> str:
259
+ """Extract only the FINAL ANSWER from the response"""
260
+ full_response = main(query)
261
 
262
+ if "FINAL ANSWER:" in full_response:
263
+ final_answer = full_response.split("FINAL ANSWER:")[-1].strip()
264
+ return final_answer
265
+ else:
266
+ return full_response.strip()
267
+
268
+ # For Jupyter notebooks - use this function directly
269
+ async def run_query(query: str) -> str:
270
+ """Direct async function for Jupyter notebooks (SILENT)"""
271
+ return await main_async(query)
272
 
273
  if __name__ == "__main__":
274
+ # Test the Agno system - CLEAN OUTPUT ONLY
275
+ result = get_final_answer("What are the names of the US presidents who were assassinated?")
276
  print(result)