import os, json, time, random, asyncio
from dotenv import load_dotenv
from typing import Optional, Dict, Any

# Load environment variables
load_dotenv()

# Agno imports (corrected based on search results)
from agno.agent import Agent
from agno.models.groq import Groq
from agno.models.google import Gemini
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.tools.yfinance import YFinanceTools

# Additional imports for custom tools
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader

# Advanced Rate Limiter with exponential backoff (SILENT)
class AdvancedRateLimiter:
    def __init__(self, requests_per_minute: int, tokens_per_minute: int = None):
        self.requests_per_minute = requests_per_minute
        self.tokens_per_minute = tokens_per_minute
        self.request_times = []
        self.token_usage = []
        self.consecutive_failures = 0
        
    async def wait_if_needed(self, estimated_tokens: int = 1000):
        current_time = time.time()
        
        # Clean old requests (older than 1 minute)
        self.request_times = [t for t in self.request_times if current_time - t < 60]
        self.token_usage = [(t, tokens) for t, tokens in self.token_usage if current_time - t < 60]
        
        # Calculate wait time for requests (SILENT)
        if len(self.request_times) >= self.requests_per_minute:
            wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
            await asyncio.sleep(wait_time)
        
        # Calculate wait time for tokens (SILENT)
        if self.tokens_per_minute:
            total_tokens = sum(tokens for _, tokens in self.token_usage)
            if total_tokens + estimated_tokens > self.tokens_per_minute:
                wait_time = 60 - (current_time - self.token_usage[0][0]) + random.uniform(3, 10)
                await asyncio.sleep(wait_time)
        
        # Add exponential backoff for consecutive failures (SILENT)
        if self.consecutive_failures > 0:
            backoff_time = min(2 ** self.consecutive_failures, 120) + random.uniform(2, 6)
            await asyncio.sleep(backoff_time)
        
        # Record this request
        self.request_times.append(current_time)
        if self.tokens_per_minute:
            self.token_usage.append((current_time, estimated_tokens))
    
    def record_success(self):
        self.consecutive_failures = 0
    
    def record_failure(self):
        self.consecutive_failures += 1

# Initialize rate limiters for free tiers
groq_limiter = AdvancedRateLimiter(requests_per_minute=30, tokens_per_minute=6000)
gemini_limiter = AdvancedRateLimiter(requests_per_minute=2, tokens_per_minute=32000)

# Custom tool functions with rate limiting (SILENT)
def multiply_tool(a: float, b: float) -> float:
    """Multiply two numbers."""
    return a * b

def add_tool(a: float, b: float) -> float:
    """Add two numbers."""
    return a + b

def subtract_tool(a: float, b: float) -> float:
    """Subtract two numbers."""
    return a - b

def divide_tool(a: float, b: float) -> float:
    """Divide two numbers."""
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b

async def web_search_tool(query: str) -> str:
    """Search the web using Tavily with rate limiting."""
    try:
        await asyncio.sleep(random.uniform(2, 5))
        search_docs = TavilySearchResults(max_results=2).invoke(query=query)
        return "\n\n---\n\n".join([doc.get("content", "") for doc in search_docs])
    except Exception as e:
        return f"Web search failed: {str(e)}"

async def wiki_search_tool(query: str) -> str:
    """Search Wikipedia with rate limiting."""
    try:
        await asyncio.sleep(random.uniform(1, 3))
        loader = WikipediaLoader(query=query, load_max_docs=1)
        data = loader.load()
        return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data])
    except Exception as e:
        return f"Wikipedia search failed: {str(e)}"

# Create specialized Agno agents (SILENT)
def create_agno_agents():
    """Create specialized Agno agents with the best free models"""
    
    # Math specialist agent (using Groq for speed)
    math_agent = Agent(
        name="Math Specialist",
        model=Groq(
            id="llama-3.3-70b-versatile",
            api_key=os.getenv("GROQ_API_KEY"),
            temperature=0
        ),
        tools=[multiply_tool, add_tool, subtract_tool, divide_tool],
        instructions=[
            "You are a mathematical specialist with access to calculation tools.",
            "Use the appropriate math tools for calculations.",
            "Show your work step by step.",
            "Always provide precise numerical answers.",
            "Finish with: FINAL ANSWER: [numerical result]"
        ],
        show_tool_calls=False,  # SILENT
        markdown=False
    )
    
    # Research specialist agent (using Gemini for capability)
    research_agent = Agent(
        name="Research Specialist", 
        model=Gemini(
            id="gemini-2.0-flash-thinking-exp",
            api_key=os.getenv("GOOGLE_API_KEY"),
            temperature=0
        ),
        tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
        instructions=[
            "You are a research specialist with access to multiple search tools.",
            "Use appropriate search tools to gather comprehensive information.",
            "Always cite sources and provide well-researched answers.",
            "Synthesize information from multiple sources when possible.",
            "Finish with: FINAL ANSWER: [your researched answer]"
        ],
        show_tool_calls=False,  # SILENT
        markdown=False
    )
    
    # Coordinator agent (using Groq for fast coordination)
    coordinator_agent = Agent(
        name="Coordinator",
        model=Groq(
            id="llama-3.3-70b-versatile",
            api_key=os.getenv("GROQ_API_KEY"), 
            temperature=0
        ),
        tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
        instructions=[
            "You are the main coordinator agent.",
            "Analyze queries and provide comprehensive responses.",
            "Use search tools for factual information when needed.",
            "Route complex math to calculation tools.",
            "Always finish with: FINAL ANSWER: [your final answer]"
        ],
        show_tool_calls=False,  # SILENT
        markdown=False
    )
    
    return {
        "math": math_agent,
        "research": research_agent, 
        "coordinator": coordinator_agent
    }

# Main Agno multi-agent system (SILENT)
class AgnoMultiAgentSystem:
    """Agno multi-agent system with comprehensive rate limiting"""
    
    def __init__(self):
        self.agents = create_agno_agents()
        self.request_count = 0
        self.last_request_time = time.time()
    
    async def process_query(self, query: str, max_retries: int = 5) -> str:
        """Process query using Agno agents with advanced rate limiting (SILENT)"""
        
        # Global rate limiting (SILENT)
        current_time = time.time()
        if current_time - self.last_request_time > 3600:
            self.request_count = 0
            self.last_request_time = current_time
        
        self.request_count += 1
        
        # Add delay between requests (SILENT)
        if self.request_count > 1:
            await asyncio.sleep(random.uniform(3, 10))
        
        for attempt in range(max_retries):
            try:
                # Route to appropriate agent based on query type (SILENT)
                if any(word in query.lower() for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']):
                    response = self.agents["math"].run(query, stream=False)
                    
                elif any(word in query.lower() for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']):
                    response = self.agents["research"].run(query, stream=False)
                    
                else:
                    response = self.agents["coordinator"].run(query, stream=False)
                
                return response.content if hasattr(response, 'content') else str(response)
                
            except Exception as e:
                error_msg = str(e).lower()
                
                if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']):
                    wait_time = (2 ** attempt) + random.uniform(15, 45)
                    await asyncio.sleep(wait_time)
                    continue
                
                elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
                    wait_time = (2 ** attempt) + random.uniform(5, 15)
                    await asyncio.sleep(wait_time)
                    continue
                
                elif attempt == max_retries - 1:
                    try:
                        return self.agents["coordinator"].run(f"Answer this as best you can: {query}", stream=False)
                    except:
                        return f"Error: {str(e)}"
                
                else:
                    wait_time = (2 ** attempt) + random.uniform(2, 8)
                    await asyncio.sleep(wait_time)
        
        return "Maximum retries exceeded. Please try again later."

# SILENT main function
async def main_async(query: str) -> str:
    """Async main function compatible with Jupyter notebooks (SILENT)"""
    agno_system = AgnoMultiAgentSystem()
    return await agno_system.process_query(query)

def main(query: str) -> str:
    """Main function using Agno multi-agent system (SILENT)"""
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            # For Jupyter notebooks
            import nest_asyncio
            nest_asyncio.apply()
            return asyncio.run(main_async(query))
        else:
            return asyncio.run(main_async(query))
    except RuntimeError:
        return asyncio.run(main_async(query))

def get_final_answer(query: str) -> str:
    """Extract only the FINAL ANSWER from the response"""
    full_response = main(query)
    
    if "FINAL ANSWER:" in full_response:
        final_answer = full_response.split("FINAL ANSWER:")[-1].strip()
        return final_answer
    else:
        return full_response.strip()

# For Jupyter notebooks - use this function directly
async def run_query(query: str) -> str:
    """Direct async function for Jupyter notebooks (SILENT)"""
    return await main_async(query)

if __name__ == "__main__":
    # Test the Agno system - CLEAN OUTPUT ONLY
    result = get_final_answer("What are the names of the US presidents who were assassinated?")
    print(result)