Final_Assignment_Template

Sleeping

App Files Files Community

josondev commited on Jun 27

Commit

0f81d99

verified ·

1 Parent(s): 0e78db3

Update veryfinal.py

Browse files

Files changed (1) hide show

veryfinal.py +234 -331

veryfinal.py CHANGED Viewed

@@ -1,373 +1,276 @@
-import os, json, time, random
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Imports
-from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
-from langchain_groq import ChatGroq
-from langchain_nvidia_ai_endpoints import ChatNVIDIA
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.document_loaders import WikipediaLoader
-from langchain_community.document_loaders import ArxivLoader
-from langchain_community.vectorstores import FAISS
-from langchain_core.messages import SystemMessage, HumanMessage
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.tools import tool
-from langchain.tools.retriever import create_retriever_tool
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import JSONLoader
-from langgraph.prebuilt import create_react_agent
-from langgraph.checkpoint.memory import MemorySaver
-from langchain_core.rate_limiters import InMemoryRateLimiter
-# Rate limiters for different providers
-groq_rate_limiter = InMemoryRateLimiter(
-    requests_per_second=0.5,  # 30 requests per minute
-    check_every_n_seconds=0.1,
-    max_bucket_size=10
-)
-google_rate_limiter = InMemoryRateLimiter(
-    requests_per_second=0.33,  # 20 requests per minute
-    check_every_n_seconds=0.1,
-    max_bucket_size=10
-)
-nvidia_rate_limiter = InMemoryRateLimiter(
-    requests_per_second=0.25,  # 15 requests per minute
-    check_every_n_seconds=0.1,
-    max_bucket_size=10
-)
-# Initialize individual LLMs
-groq_llm = ChatGroq(
-    model="llama-3.3-70b-versatile",
-    temperature=0,
-    api_key=os.getenv("GROQ_API_KEY"),
-    rate_limiter=groq_rate_limiter,
-    max_retries=2,
-    request_timeout=60
-)
-nvidia_llm = ChatNVIDIA(
-    model="meta/llama-3.1-405b-instruct",
-    temperature=0,
-    api_key=os.getenv("NVIDIA_API_KEY"),
-    rate_limiter=nvidia_rate_limiter,
-    max_retries=2
-)
-# Create LLM tools that can be selected by the agent
-@tool
-def groq_reasoning_tool(query: str) -> str:
-    """Use Groq's Llama model for fast reasoning, mathematical calculations, and logical problems.
-    Best for: Math problems, logical reasoning, quick calculations, code generation.
-    Args:
-        query: The question or problem to solve
-    """
-    try:
-        time.sleep(random.uniform(1, 2))  # Rate limiting
-        response = groq_llm.invoke([HumanMessage(content=query)])
-        return f"Groq Response: {response.content}"
-    except Exception as e:
-        return f"Groq tool failed: {str(e)}"
-@tool
-def nvidia_specialist_tool(query: str) -> str:
-    """Use NVIDIA's large model for specialized tasks, technical questions, and domain expertise.
-    Best for: Technical questions, specialized domains, scientific problems, detailed analysis.
-    Args:
-        query: The specialized question or technical problem
-    """
-    try:
-        time.sleep(random.uniform(2, 4))  # Rate limiting
-        response = nvidia_llm.invoke([HumanMessage(content=query)])
-        return f"NVIDIA Response: {response.content}"
-    except Exception as e:
-        return f"NVIDIA tool failed: {str(e)}"
-# Define calculation tools
-@tool
-def multiply(a: int | float, b: int | float) -> int | float:
-    """Multiply two numbers.
-    Args:
-        a: first int | float
-        b: second int | float
-    """
     return a * b
-@tool
-def add(a: int | float, b: int | float) -> int | float:
-    """Add two numbers.
-    Args:
-        a: first int | float
-        b: second int | float
-    """
     return a + b
-@tool
-def subtract(a: int | float , b: int | float) -> int | float:
-    """Subtract two numbers.
-    Args:
-        a: first int | float
-        b: second int | float
-    """
     return a - b
-@tool
-def divide(a: int | float, b: int | float) -> int | float:
-    """Divide two numbers.
-    Args:
-        a: first int | float
-        b: second int | float
-    """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
-@tool
-def modulus(a: int | float, b: int | float) -> int | float:
-    """Get the modulus of two numbers.
-    Args:
-        a: first int | float
-        b: second int | float
-    """
-    return a % b
-# Define search tools
-@tool
-def wiki_search(query: str) -> str:
-    """Search the wikipedia for a query and return the first paragraph
-    args:
-        query: the query to search for
-    """
     try:
         loader = WikipediaLoader(query=query, load_max_docs=1)
         data = loader.load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'\n{doc.page_content}\n'
-                for doc in data
-            ])
-        return formatted_search_docs
     except Exception as e:
         return f"Wikipedia search failed: {str(e)}"
-@tool
-def web_search(query: str) -> str:
-    """Search Tavily for a query and return maximum 3 results.
-    Args:
-        query: The search query.
-    """
-    try:
-        time.sleep(random.uniform(1, 3))
-        search_docs = TavilySearchResults(max_results=3).invoke(query=query)
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'\n{doc.get("content", "")}\n'
-                for doc in search_docs
-            ])
-        return formatted_search_docs
-    except Exception as e:
-        return f"Web search failed: {str(e)}"
-@tool
-def arxiv_search(query: str) -> str:
-    """Search Arxiv for a query and return maximum 3 result.
-    Args:
-        query: The search query.
-    """
-    try:
-        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
-        formatted_search_docs = "\n\n---\n\n".join(
-            [
-                f'\n{doc.page_content[:1000]}\n'
-                for doc in search_docs
-            ])
-        return formatted_search_docs
-    except Exception as e:
-        return f"ArXiv search failed: {str(e)}"
-# Load and process your JSONL data
-jq_schema = """
-{
-  page_content: .Question,
-  metadata: {
-    task_id: .task_id,
-    Level: .Level,
-    Final_answer: ."Final answer",
-    file_name: .file_name,
-    Steps: .["Annotator Metadata"].Steps,
-    Number_of_steps: .["Annotator Metadata"]["Number of steps"],
-    How_long: .["Annotator Metadata"]["How long did this take?"],
-    Tools: .["Annotator Metadata"].Tools,
-    Number_of_tools: .["Annotator Metadata"]["Number of tools"]
-  }
-}
-"""
-# Load documents and create vector database
-json_loader = JSONLoader(file_path="metadata.jsonl", jq_schema=jq_schema, json_lines=True, text_content=False)
-json_docs = json_loader.load()
-# Split documents
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=200)
-json_chunks = text_splitter.split_documents(json_docs)
-# Create vector database
-database = FAISS.from_documents(json_chunks, NVIDIAEmbeddings())
-# Create retriever and retriever tool
-retriever = database.as_retriever(search_type="similarity", search_kwargs={"k": 3})
-retriever_tool = create_retriever_tool(
-    retriever=retriever,
-    name="question_search",
-    description="Search for similar questions and their solutions from the knowledge base."
-)
-# Combine all tools including LLM tools
-tools = [
-    # Math tools
-    multiply,
-    add,
-    subtract,
-    divide,
-    modulus,
-    # Search tools
-    wiki_search,
-    web_search,
-    arxiv_search,
-    retriever_tool,
-    # LLM tools - agent can choose which LLM to use
-    groq_reasoning_tool,
-    nvidia_specialist_tool
-]
-# Use a lightweight coordinator LLM (Groq for speed)
-coordinator_llm = ChatGroq(
-    model="llama-3.3-70b-versatile",
-    temperature=0,
-    api_key=os.getenv("GROQ_API_KEY"),
-    rate_limiter=groq_rate_limiter
-)
-# Create memory for conversation
-memory = MemorySaver()
-# Create the agent with coordinator LLM
-agent_executor = create_react_agent(
-    model=coordinator_llm,
-    tools=tools,
-    checkpointer=memory
-)
-# Enhanced robust agent run
-def robust_agent_run(query, thread_id="robust_conversation", max_retries=3):
-    """Run agent with error handling, rate limiting, and LLM tool selection"""
-    for attempt in range(max_retries):
-        try:
-            config = {"configurable": {"thread_id": f"{thread_id}_{attempt}"}}
-            system_msg = SystemMessage(content='''You are a helpful assistant with access to multiple specialized LLM tools and other utilities.
-AVAILABLE LLM TOOLS:
-- groq_reasoning_tool: Fast reasoning, math, calculations, code (use for quick logical problems)
-- google_analysis_tool: Complex analysis, creative tasks, detailed explanations (use for comprehensive analysis)
-- nvidia_specialist_tool: Technical questions, specialized domains, scientific problems (use for expert-level tasks)
-TOOL SELECTION STRATEGY:
-- For math/calculations: Use basic math tools (add, multiply, etc.) OR groq_reasoning_tool for complex math
-- For factual questions: Use web_search, wiki_search, or arxiv_search first
-- For analysis/reasoning: Choose the most appropriate LLM tool based on complexity
-- For technical/scientific: Use nvidia_specialist_tool
-- For creative/comprehensive: Use google_analysis_tool
-- For quick logical problems: Use groq_reasoning_tool
-Always finish with: FINAL ANSWER: [YOUR FINAL ANSWER]
-Your answer should be a number OR few words OR comma separated list as appropriate.''')
-            user_msg = HumanMessage(content=query)
-            result = []
-            print(f"Attempt {attempt + 1}: Processing query with multi-LLM agent...")
-            for step in agent_executor.stream(
-                {"messages": [system_msg, user_msg]},
-                config,
-                stream_mode="values"
-            ):
-                result = step["messages"]
-            final_response = result[-1].content if result else "No response generated"
-            print(f"Query processed successfully on attempt {attempt + 1}")
-            return final_response
-        except Exception as e:
-            error_msg = str(e).lower()
-            if any(keyword in error_msg for keyword in ['rate limit', 'too many requests', '429', 'quota exceeded']):
-                wait_time = (2 ** attempt) + random.uniform(1, 3)
-                print(f"Rate limit hit on attempt {attempt + 1}. Waiting {wait_time:.2f} seconds...")
-                time.sleep(wait_time)
-                if attempt == max_retries - 1:
-                    return f"Rate limit exceeded after {max_retries} attempts: {str(e)}"
-                continue
-            elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
-                wait_time = (2 ** attempt) + random.uniform(0.5, 1.5)
-                print(f"API error on attempt {attempt + 1}. Retrying in {wait_time:.2f} seconds...")
-                time.sleep(wait_time)
-                if attempt == max_retries - 1:
-                    return f"API error after {max_retries} attempts: {str(e)}"
-                continue
-            else:
-                return f"Error occurred: {str(e)}"
-    return "Maximum retries exceeded"
-# Main function with request tracking
-request_count = 0
-last_request_time = time.time()
 def main(query: str) -> str:
-    """Main function to run the multi-LLM agent"""
-    global request_count, last_request_time
-    current_time = time.time()
-    # Reset counter every minute
-    if current_time - last_request_time > 60:
-        request_count = 0
-        last_request_time = current_time
-    request_count += 1
-    print(f"Processing request #{request_count} with multi-LLM agent")
-    # Add delay between requests
-    if request_count > 1:
-        time.sleep(random.uniform(2, 5))
-    return robust_agent_run(query)
 if __name__ == "__main__":
-    # Test the multi-LLM agent
-    result = main("What are the names of the US presidents who were assassinated?")
     print(result)

+import os, json, time, random, asyncio
 from dotenv import load_dotenv
+from typing import Optional, Dict, Any
 # Load environment variables
 load_dotenv()
+# Agno imports (corrected based on search results)
+from agno.agent import Agent
+from agno.models.groq import Groq
+from agno.models.google import Gemini
+from agno.tools.duckduckgo import DuckDuckGoTools
+from agno.tools.yfinance import YFinanceTools
+# Additional imports for custom tools
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
+# Advanced Rate Limiter with exponential backoff (SILENT)
+class AdvancedRateLimiter:
+    def __init__(self, requests_per_minute: int, tokens_per_minute: int = None):
+        self.requests_per_minute = requests_per_minute
+        self.tokens_per_minute = tokens_per_minute
+        self.request_times = []
+        self.token_usage = []
+        self.consecutive_failures = 0
+    async def wait_if_needed(self, estimated_tokens: int = 1000):
+        current_time = time.time()
+        # Clean old requests (older than 1 minute)
+        self.request_times = [t for t in self.request_times if current_time - t < 60]
+        self.token_usage = [(t, tokens) for t, tokens in self.token_usage if current_time - t < 60]
+        # Calculate wait time for requests (SILENT)
+        if len(self.request_times) >= self.requests_per_minute:
+            wait_time = 60 - (current_time - self.request_times[0]) + random.uniform(2, 8)
+            await asyncio.sleep(wait_time)
+        # Calculate wait time for tokens (SILENT)
+        if self.tokens_per_minute:
+            total_tokens = sum(tokens for _, tokens in self.token_usage)
+            if total_tokens + estimated_tokens > self.tokens_per_minute:
+                wait_time = 60 - (current_time - self.token_usage[0][0]) + random.uniform(3, 10)
+                await asyncio.sleep(wait_time)
+        # Add exponential backoff for consecutive failures (SILENT)
+        if self.consecutive_failures > 0:
+            backoff_time = min(2 ** self.consecutive_failures, 120) + random.uniform(2, 6)
+            await asyncio.sleep(backoff_time)
+        # Record this request
+        self.request_times.append(current_time)
+        if self.tokens_per_minute:
+            self.token_usage.append((current_time, estimated_tokens))
+    def record_success(self):
+        self.consecutive_failures = 0
+    def record_failure(self):
+        self.consecutive_failures += 1
+# Initialize rate limiters for free tiers
+groq_limiter = AdvancedRateLimiter(requests_per_minute=30, tokens_per_minute=6000)
+gemini_limiter = AdvancedRateLimiter(requests_per_minute=2, tokens_per_minute=32000)
+# Custom tool functions with rate limiting (SILENT)
+def multiply_tool(a: float, b: float) -> float:
+    """Multiply two numbers."""
     return a * b
+def add_tool(a: float, b: float) -> float:
+    """Add two numbers."""
     return a + b
+def subtract_tool(a: float, b: float) -> float:
+    """Subtract two numbers."""
     return a - b
+def divide_tool(a: float, b: float) -> float:
+    """Divide two numbers."""
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
+async def web_search_tool(query: str) -> str:
+    """Search the web using Tavily with rate limiting."""
+    try:
+        await asyncio.sleep(random.uniform(2, 5))
+        search_docs = TavilySearchResults(max_results=2).invoke(query=query)
+        return "\n\n---\n\n".join([doc.get("content", "") for doc in search_docs])
+    except Exception as e:
+        return f"Web search failed: {str(e)}"
+async def wiki_search_tool(query: str) -> str:
+    """Search Wikipedia with rate limiting."""
     try:
+        await asyncio.sleep(random.uniform(1, 3))
         loader = WikipediaLoader(query=query, load_max_docs=1)
         data = loader.load()
+        return "\n\n---\n\n".join([doc.page_content[:1000] for doc in data])
     except Exception as e:
         return f"Wikipedia search failed: {str(e)}"
+# Create specialized Agno agents (SILENT)
+def create_agno_agents():
+    """Create specialized Agno agents with the best free models"""
+    # Math specialist agent (using Groq for speed)
+    math_agent = Agent(
+        name="Math Specialist",
+        model=Groq(
+            id="llama-3.3-70b-versatile",
+            api_key=os.getenv("GROQ_API_KEY"),
+            temperature=0
+        ),
+        tools=[multiply_tool, add_tool, subtract_tool, divide_tool],
+        instructions=[
+            "You are a mathematical specialist with access to calculation tools.",
+            "Use the appropriate math tools for calculations.",
+            "Show your work step by step.",
+            "Always provide precise numerical answers.",
+            "Finish with: FINAL ANSWER: [numerical result]"
+        ],
+        show_tool_calls=False,  # SILENT
+        markdown=False
+    )
+    # Research specialist agent (using Gemini for capability)
+    research_agent = Agent(
+        name="Research Specialist",
+        model=Gemini(
+            id="gemini-2.0-flash-thinking-exp",
+            api_key=os.getenv("GOOGLE_API_KEY"),
+            temperature=0
+        ),
+        tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
+        instructions=[
+            "You are a research specialist with access to multiple search tools.",
+            "Use appropriate search tools to gather comprehensive information.",
+            "Always cite sources and provide well-researched answers.",
+            "Synthesize information from multiple sources when possible.",
+            "Finish with: FINAL ANSWER: [your researched answer]"
+        ],
+        show_tool_calls=False,  # SILENT
+        markdown=False
+    )
+    # Coordinator agent (using Groq for fast coordination)
+    coordinator_agent = Agent(
+        name="Coordinator",
+        model=Groq(
+            id="llama-3.3-70b-versatile",
+            api_key=os.getenv("GROQ_API_KEY"),
+            temperature=0
+        ),
+        tools=[DuckDuckGoTools(), web_search_tool, wiki_search_tool],
+        instructions=[
+            "You are the main coordinator agent.",
+            "Analyze queries and provide comprehensive responses.",
+            "Use search tools for factual information when needed.",
+            "Route complex math to calculation tools.",
+            "Always finish with: FINAL ANSWER: [your final answer]"
+        ],
+        show_tool_calls=False,  # SILENT
+        markdown=False
+    )
+    return {
+        "math": math_agent,
+        "research": research_agent,
+        "coordinator": coordinator_agent
+    }
+# Main Agno multi-agent system (SILENT)
+class AgnoMultiAgentSystem:
+    """Agno multi-agent system with comprehensive rate limiting"""
+    def __init__(self):
+        self.agents = create_agno_agents()
+        self.request_count = 0
+        self.last_request_time = time.time()
+    async def process_query(self, query: str, max_retries: int = 5) -> str:
+        """Process query using Agno agents with advanced rate limiting (SILENT)"""
+        # Global rate limiting (SILENT)
+        current_time = time.time()
+        if current_time - self.last_request_time > 3600:
+            self.request_count = 0
+            self.last_request_time = current_time
+        self.request_count += 1
+        # Add delay between requests (SILENT)
+        if self.request_count > 1:
+            await asyncio.sleep(random.uniform(3, 10))
+        for attempt in range(max_retries):
+            try:
+                # Route to appropriate agent based on query type (SILENT)
+                if any(word in query.lower() for word in ['calculate', 'math', 'multiply', 'add', 'subtract', 'divide', 'compute']):
+                    response = self.agents["math"].run(query, stream=False)
+                elif any(word in query.lower() for word in ['search', 'find', 'research', 'what is', 'who is', 'when', 'where']):
+                    response = self.agents["research"].run(query, stream=False)
+                else:
+                    response = self.agents["coordinator"].run(query, stream=False)
+                return response.content if hasattr(response, 'content') else str(response)
+            except Exception as e:
+                error_msg = str(e).lower()
+                if any(keyword in error_msg for keyword in ['rate limit', '429', 'quota', 'too many requests']):
+                    wait_time = (2 ** attempt) + random.uniform(15, 45)
+                    await asyncio.sleep(wait_time)
+                    continue
+                elif any(keyword in error_msg for keyword in ['api', 'connection', 'timeout', 'service unavailable']):
+                    wait_time = (2 ** attempt) + random.uniform(5, 15)
+                    await asyncio.sleep(wait_time)
+                    continue
+                elif attempt == max_retries - 1:
+                    try:
+                        return self.agents["coordinator"].run(f"Answer this as best you can: {query}", stream=False)
+                    except:
+                        return f"Error: {str(e)}"
+                else:
+                    wait_time = (2 ** attempt) + random.uniform(2, 8)
+                    await asyncio.sleep(wait_time)
+        return "Maximum retries exceeded. Please try again later."
+# SILENT main function
+async def main_async(query: str) -> str:
+    """Async main function compatible with Jupyter notebooks (SILENT)"""
+    agno_system = AgnoMultiAgentSystem()
+    return await agno_system.process_query(query)
 def main(query: str) -> str:
+    """Main function using Agno multi-agent system (SILENT)"""
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            # For Jupyter notebooks
+            import nest_asyncio
+            nest_asyncio.apply()
+            return asyncio.run(main_async(query))
+        else:
+            return asyncio.run(main_async(query))
+    except RuntimeError:
+        return asyncio.run(main_async(query))
+def get_final_answer(query: str) -> str:
+    """Extract only the FINAL ANSWER from the response"""
+    full_response = main(query)
+    if "FINAL ANSWER:" in full_response:
+        final_answer = full_response.split("FINAL ANSWER:")[-1].strip()
+        return final_answer
+    else:
+        return full_response.strip()
+# For Jupyter notebooks - use this function directly
+async def run_query(query: str) -> str:
+    """Direct async function for Jupyter notebooks (SILENT)"""
+    return await main_async(query)
 if __name__ == "__main__":
+    # Test the Agno system - CLEAN OUTPUT ONLY
+    result = get_final_answer("What are the names of the US presidents who were assassinated?")
     print(result)