import os, json, time, random, asyncio from dotenv import load_dotenv from typing import Optional, Dict, Any # Load environment variables load_dotenv() # Agno imports (corrected based on search results) from agno.agent import Agent from agno.models.groq import Groq from agno.models.google import Gemini from agno.tools.duckduckgo import DuckDuckGoTools from agno.tools.yfinance import YFinanceTools # Additional imports for custom tools from langchain_community.tools.tavily_search import TavilySearchResults from langchain_community.document_loaders import WikipediaLoader, ArxivLoader # Advanced Rate Limiter with exponential backoff (SILENT) class AdvancedRateLimiter: def __init__(self, requests_per_minute: int, tokens_per_minute: int = None): self.requests_per_minute = requests_per_minute self.tokens_per_minute = tokens_per_minute self.request_times = [] self.token_usage = [] self.consecutive_failures = 0 async def wait_if_needed(self, estimated_tokens: int = 1000): current_time = time.time() # Clean old requests (older than 1 minute) self.request_times = [t for t in self.request_times if current_time - t < 60] self.token_usage = [(t, tokens) for t, tokens in self.token_usage if current_time - t < 60] # Calculate wait time for requests (SILENT) if len(self.request_times) >= self.requests_per_minute: wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8) await asyncio.sleep(wait_time) # Calculate wait time for tokens (SILENT) if self.tokens_per_minute: total_tokens = sum(tokens for _, tokens in self.token_usage) if total_tokens + estimated_tokens > self.tokens_per_minute: wait_time = 60 - (current_time - self.token_usage[0][0]) + random.uniform(3, 10) await asyncio.sleep(wait_time) # Add exponential backoff for consecutive failures (SILENT) if self.consecutive_failures > 0: backoff_time = min(2 ** self.consecutive_failures, 120) + random.uniform(2, 6) await asyncio.sleep(backoff_time) # Record this request self.request_times.append(current_time) if self.tokens_per_minute: self.token_usage.append((current_time, estimated_tokens)) def record_success(self): self.consecutive_failures = 0 def record_failure(self): self.consecutive_failures += 1 # Initialize rate limiters for free tiers groq_limiter = AdvancedRateLimiter(requests_per_minute=30, tokens_per_minute=6000) gemini_limiter = AdvancedRateLimiter(requests_per_minute=2, tokens_per_minute=32000) # Custom tool functions with rate limiting (SILENT) def multiply_tool(a: float, b: float) -> float: """Multiply two numbers.""" return a * b def add_tool(a: float, b: float) -> float: """Add two numbers.""" return a + b def subtract_tool(a: float, b: float) -> float: """Subtract two numbers.""" return a - b def divide_tool(a: float, b: float) -> float: """Divide two numbers.""" if b == 0: raise ValueError("Cannot divide by zero.") return a / b async def web_search_tool(query: str) -> str: """Search the web using Tavily with rate limiting.""" try: await asyncio.sleep(random.uniform(2, 5)) search_docs = TavilySearchResults(max_results=2).invoke(query=query) return "\n\n---\n\n".join([doc.get("content", "") for doc in search_docs]) except Exception as e: return f"Web search failed: {str(e)}" async def wiki_search_tool(query: str) -> str: """Search Wikipedia with rate limiting.""" try: await asyncio.sleep(random.uniform(1, 3)) loader = WikipediaLoader(query=query, load_max_docs=1) data = loader.load() return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data]) except Exception as e: return f"Wikipedia search failed: {str(e)}" # Create specialized Agno agents (SILENT) def create_agno_agents(): """Create specialized Agno agents with the best free models""" # Math specialist agent (using Groq for speed) math_agent = Agent( name="Math Specialist", model=Groq( id="llama-3.3-70b-versatile", api_key=os.getenv("GROQ_API_KEY"), temperature=0 ), tools=[multiply_tool, add_tool, subtract_tool, divide_tool], instructions=[ "You are a mathematical specialist with access to calculation tools.", "Use the appropriate math tools for calculations.", "Show your work step by step.", "Always provide precise numerical answers.", "Finish with: FINAL ANSWER: [numerical result]" ], show_tool_calls=False, # SILENT markdown=False ) # Research specialist agent (using Gemini for capability) research_agent = Agent( name="Research Specialist", model=Gemini( id="gemini-2.0-flash-thinking-exp", api_key=os.getenv("GOOGLE_API_KEY"), temperature=0 ), tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool], instructions=[ "You are a research specialist with access to multiple search tools.", "Use appropriate search tools to gather comprehensive information.", "Always cite sources and provide well-researched answers.", "Synthesize information from multiple sources when possible.", "Finish with: FINAL ANSWER: [your researched answer]" ], show_tool_calls=False, # SILENT markdown=False ) # Coordinator agent (using Groq for fast coordination) coordinator_agent = Agent( name="Coordinator", model=Groq( id="llama-3.3-70b-versatile", api_key=os.getenv("GROQ_API_KEY"), temperature=0 ), tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool], instructions=[ "You are the main coordinator agent.", "Analyze queries and provide comprehensive responses.", "Use search tools for factual information when needed.", "Route complex math to calculation tools.", "Always finish with: FINAL ANSWER: [your final answer]" ], show_tool_calls=False, # SILENT markdown=False ) return { "math": math_agent, "research": research_agent, "coordinator": coordinator_agent } # Main Agno multi-agent system (SILENT) class AgnoMultiAgentSystem: """Agno multi-agent system with comprehensive rate limiting""" def __init__(self): self.agents = create_agno_agents() self.request_count = 0 self.last_request_time = time.time() async def process_query(self, query: str, max_retries: int = 5) -> str: """Process query using Agno agents with advanced rate limiting (SILENT)""" # Global rate limiting (SILENT) current_time = time.time() if current_time - self.last_request_time > 3600: self.request_count = 0 self.last_request_time = current_time self.request_count += 1 # Add delay between requests (SILENT) if self.request_count > 1: await asyncio.sleep(random.uniform(3, 10)) for attempt in range(max_retries): try: # Route to appropriate agent based on query type (SILENT) if any(word in query.lower() for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']): response = self.agents["math"].run(query, stream=False) elif any(word in query.lower() for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']): response = self.agents["research"].run(query, stream=False) else: response = self.agents["coordinator"].run(query, stream=False) return response.content if hasattr(response, 'content') else str(response) except Exception as e: error_msg = str(e).lower() if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']): wait_time = (2 ** attempt) + random.uniform(15, 45) await asyncio.sleep(wait_time) continue elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']): wait_time = (2 ** attempt) + random.uniform(5, 15) await asyncio.sleep(wait_time) continue elif attempt == max_retries - 1: try: return self.agents["coordinator"].run(f"Answer this as best you can: {query}", stream=False) except: return f"Error: {str(e)}" else: wait_time = (2 ** attempt) + random.uniform(2, 8) await asyncio.sleep(wait_time) return "Maximum retries exceeded. Please try again later." # SILENT main function async def main_async(query: str) -> str: """Async main function compatible with Jupyter notebooks (SILENT)""" agno_system = AgnoMultiAgentSystem() return await agno_system.process_query(query) def main(query: str) -> str: """Main function using Agno multi-agent system (SILENT)""" try: loop = asyncio.get_event_loop() if loop.is_running(): # For Jupyter notebooks import nest_asyncio nest_asyncio.apply() return asyncio.run(main_async(query)) else: return asyncio.run(main_async(query)) except RuntimeError: return asyncio.run(main_async(query)) def get_final_answer(query: str) -> str: """Extract only the FINAL ANSWER from the response""" full_response = main(query) if "FINAL ANSWER:" in full_response: final_answer = full_response.split("FINAL ANSWER:")[-1].strip() return final_answer else: return full_response.strip() # For Jupyter notebooks - use this function directly async def run_query(query: str) -> str: """Direct async function for Jupyter notebooks (SILENT)""" return await main_async(query) if __name__ == "__main__": # Test the Agno system - CLEAN OUTPUT ONLY result = get_final_answer("What are the names of the US presidents who were assassinated?") print(result)