Final_Assignment_Template

Running

App Files Files Community

josondev commited on 1 day ago

Commit

ca98093

verified ·

1 Parent(s): 15b6891

Update veryfinal.py

Browse files

Files changed (1) hide show

veryfinal.py +269 -256

veryfinal.py CHANGED Viewed

@@ -1,326 +1,339 @@
 """
-Enhanced Multi-LLM Agent System - CORRECTED VERSION
-Fixes the issue where questions are returned as answers
 """
 import os
 import time
 import random
-import operator
-from typing import List, Dict, Any, TypedDict, Annotated
 from dotenv import load_dotenv
-from langchain_core.tools import tool
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.document_loaders import WikipediaLoader
-from langgraph.graph import StateGraph, END
 from langgraph.checkpoint.memory import MemorySaver
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_groq import ChatGroq
 load_dotenv()
-# Enhanced system prompt for proper question-answering
-ENHANCED_SYSTEM_PROMPT = (
-    "You are a helpful assistant tasked with answering questions using available tools. "
-    "Follow these guidelines:\n"
-    "1. Read the question carefully and understand what is being asked\n"
-    "2. Use available tools when you need external information\n"
-    "3. Provide accurate, specific answers based on the information you find\n"
-    "4. For numbers: don't use commas or units unless specified\n"
-    "5. For strings: don't use articles or abbreviations, write digits in plain text\n"
-    "6. Always end with 'FINAL ANSWER: [YOUR ANSWER]' where [YOUR ANSWER] is concise\n"
-    "7. Never repeat the question as your answer\n"
-    "8. If you cannot find the answer, state 'Information not available'\n"
-)
-# ---- Tool Definitions ----
 @tool
 def multiply(a: int, b: int) -> int:
-    """Multiply two integers and return the product."""
     return a * b
 @tool
 def add(a: int, b: int) -> int:
-    """Add two integers and return the sum."""
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
-    """Subtract the second integer from the first and return the difference."""
     return a - b
 @tool
 def divide(a: int, b: int) -> float:
-    """Divide the first integer by the second and return the quotient."""
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
-    """Return the remainder when dividing the first integer by the second."""
     return a % b
 @tool
-def optimized_web_search(query: str) -> str:
-    """Perform web search using TavilySearchResults."""
     try:
-        time.sleep(random.uniform(0.7, 1.5))
         search_tool = TavilySearchResults(max_results=3)
-        docs = search_tool.invoke({"query": query})
-        return "\n\n---\n\n".join(
-            f"<Doc url='{d.get('url','')}'>{d.get('content','')[:800]}</Doc>"
-            for d in docs
-        )
     except Exception as e:
         return f"Web search failed: {e}"
 @tool
-def optimized_wiki_search(query: str) -> str:
-    """Perform Wikipedia search and return content."""
     try:
-        time.sleep(random.uniform(0.3, 1))
-        docs = WikipediaLoader(query=query, load_max_docs=2).load()
-        return "\n\n---\n\n".join(
-            f"<Doc src='{d.metadata.get('source','Wikipedia')}'>{d.page_content[:1000]}</Doc>"
-            for d in docs
-        )
     except Exception as e:
-        return f"Wikipedia search failed: {e}"
-# ---- Enhanced Agent State ----
-class EnhancedAgentState(TypedDict):
-    """State structure for the enhanced agent system."""
-    messages: Annotated[List[HumanMessage | AIMessage], operator.add]
-    query: str
-    agent_type: str
-    final_answer: str
-    perf: Dict[str, Any]
-    agno_resp: str
-# ---- Enhanced Multi-LLM System ----
-class HybridLangGraphMultiLLMSystem:
-    """Enhanced question-answering system with proper response handling."""
-    def __init__(self):
-        """Initialize the enhanced multi-LLM system."""
-        self.tools = [
-            multiply, add, subtract, divide, modulus,
-            optimized_web_search, optimized_wiki_search
-        ]
-        self.graph = self._build_graph()
-    def _llm(self, model_name: str) -> ChatGroq:
-        """Create a Groq LLM instance."""
-        return ChatGroq(
-            model=model_name,
-            temperature=0,
-            api_key=os.getenv("GROQ_API_KEY")
         )
-    def _build_graph(self) -> StateGraph:
-        """Build the LangGraph state machine with proper response handling."""
-        # Initialize LLMs
-        llama8_llm = self._llm("llama3-8b-8192")
-        llama70_llm = self._llm("llama3-70b-8192")
-        deepseek_llm = self._llm("deepseek-chat")
-        def router(st: EnhancedAgentState) -> EnhancedAgentState:
-            """Route queries to appropriate LLM based on content analysis."""
-            q = st["query"].lower()
-            # Enhanced routing logic
-            if any(keyword in q for keyword in ["calculate", "compute", "math", "multiply", "add", "subtract", "divide"]):
-                t = "llama70"  # Use more powerful model for calculations
-            elif any(keyword in q for keyword in ["search", "find", "lookup", "wikipedia", "information about"]):
-                t = "search_enhanced"  # Use search-enhanced processing
-            elif "deepseek" in q or any(keyword in q for keyword in ["analyze", "reasoning", "complex"]):
-                t = "deepseek"
-            elif "llama-8" in q:
-                t = "llama8"
-            elif len(q.split()) > 20:  # Complex queries
-                t = "llama70"
-            else:
-                t = "llama8"  # Default for simple queries
-            return {**st, "agent_type": t}
-        def llama8_node(st: EnhancedAgentState) -> EnhancedAgentState:
-            """Process query with Llama-3 8B model."""
-            t0 = time.time()
-            try:
-                # Create enhanced prompt with context
-                enhanced_query = f"""
-                Question: {st["query"]}
-                Please provide a direct, accurate answer to this question. Do not repeat the question.
-                """
-                sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
-                res = llama8_llm.invoke([sys, HumanMessage(content=enhanced_query)])
-                # Extract and clean the answer
-                answer = res.content.strip()
-                if "FINAL ANSWER:" in answer:
-                    answer = answer.split("FINAL ANSWER:")[-1].strip()
-                return {**st,
-                        "final_answer": answer,
-                        "perf": {"time": time.time() - t0, "prov": "Groq-Llama3-8B"}}
-            except Exception as e:
-                return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
-        def llama70_node(st: EnhancedAgentState) -> EnhancedAgentState:
-            """Process query with Llama-3 70B model."""
-            t0 = time.time()
             try:
-                # Create enhanced prompt with context
-                enhanced_query = f"""
-                Question: {st["query"]}
-                Please provide a direct, accurate answer to this question. Do not repeat the question.
-                """
-                sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
-                res = llama70_llm.invoke([sys, HumanMessage(content=enhanced_query)])
-                # Extract and clean the answer
-                answer = res.content.strip()
-                if "FINAL ANSWER:" in answer:
-                    answer = answer.split("FINAL ANSWER:")[-1].strip()
-                return {**st,
-                        "final_answer": answer,
-                        "perf": {"time": time.time() - t0, "prov": "Groq-Llama3-70B"}}
             except Exception as e:
-                return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
-        def deepseek_node(st: EnhancedAgentState) -> EnhancedAgentState:
-            """Process query with DeepSeek model."""
-            t0 = time.time()
-            try:
-                # Create enhanced prompt with context
-                enhanced_query = f"""
-                Question: {st["query"]}
-                Please provide a direct, accurate answer to this question. Do not repeat the question.
-                """
-                sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
-                res = deepseek_llm.invoke([sys, HumanMessage(content=enhanced_query)])
-                # Extract and clean the answer
-                answer = res.content.strip()
-                if "FINAL ANSWER:" in answer:
-                    answer = answer.split("FINAL ANSWER:")[-1].strip()
-                return {**st,
-                        "final_answer": answer,
-                        "perf": {"time": time.time() - t0, "prov": "Groq-DeepSeek"}}
-            except Exception as e:
-                return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
-        def search_enhanced_node(st: EnhancedAgentState) -> EnhancedAgentState:
-            """Process query with search enhancement."""
-            t0 = time.time()
-            try:
-                # Determine search strategy
-                query = st["query"]
-                search_results = ""
-                if any(keyword in query.lower() for keyword in ["wikipedia", "wiki"]):
-                    search_results = optimized_wiki_search.invoke({"query": query})
                 else:
-                    search_results = optimized_web_search.invoke({"query": query})
-                # Create comprehensive prompt with search results
-                enhanced_query = f"""
-                Original Question: {query}
-                Search Results:
-                {search_results}
-                Based on the search results above, provide a direct answer to the original question.
-                Extract the specific information requested. Do not repeat the question.
-                """
-                sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
-                res = llama70_llm.invoke([sys, HumanMessage(content=enhanced_query)])
-                # Extract and clean the answer
-                answer = res.content.strip()
-                if "FINAL ANSWER:" in answer:
-                    answer = answer.split("FINAL ANSWER:")[-1].strip()
-                return {**st,
-                        "final_answer": answer,
-                        "perf": {"time": time.time() - t0, "prov": "Search-Enhanced-Llama70"}}
-            except Exception as e:
-                return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
-        # Build graph
-        g = StateGraph(EnhancedAgentState)
-        g.add_node("router", router)
-        g.add_node("llama8", llama8_node)
-        g.add_node("llama70", llama70_node)
-        g.add_node("deepseek", deepseek_node)
-        g.add_node("search_enhanced", search_enhanced_node)
-        g.set_entry_point("router")
-        g.add_conditional_edges("router", lambda s: s["agent_type"], {
-            "llama8": "llama8",
-            "llama70": "llama70",
-            "deepseek": "deepseek",
-            "search_enhanced": "search_enhanced"
-        })
-        for node in ["llama8", "llama70", "deepseek", "search_enhanced"]:
-            g.add_edge(node, END)
-        return g.compile(checkpointer=MemorySaver())
-    def process_query(self, q: str) -> str:
-        """Process a query and return the final answer."""
-        state = {
-            "messages": [HumanMessage(content=q)],
-            "query": q,
-            "agent_type": "",
-            "final_answer": "",
-            "perf": {},
-            "agno_resp": ""
         }
-        cfg = {"configurable": {"thread_id": f"qa_{hash(q)}"}}
-        try:
-            out = self.graph.invoke(state, cfg)
-            answer = out.get("final_answer", "").strip()
-            # Ensure we don't return the question as the answer
-            if answer == q or answer.startswith(q):
-                return "Information not available"
-            return answer if answer else "No answer generated"
-        except Exception as e:
-            return f"Error processing query: {e}"
-def build_graph(provider: str | None = None) -> StateGraph:
-    """Build and return the graph for the enhanced agent system."""
-    return HybridLangGraphMultiLLMSystem().graph
-if __name__ == "__main__":
-    # Test the system
-    qa_system = HybridLangGraphMultiLLMSystem()
     test_questions = [
         "What is 25 multiplied by 17?",
-        "Who was the first president of the United States?",
-        "Find information about artificial intelligence on Wikipedia"
     ]
     for question in test_questions:
-        print(f"Question: {question}")
-        answer = qa_system.process_query(question)
-        print(f"Answer: {answer}")
-        print("-" * 50)

 """
+Enhanced LangGraph Agent with Multi-LLM Support and Proper Question Answering
+Combines your original LangGraph structure with enhanced response handling
 """
 import os
 import time
 import random
 from dotenv import load_dotenv
+from typing import List, Dict, Any, TypedDict, Annotated
+import operator
+from langgraph.graph import START, StateGraph, MessagesState, END
+from langgraph.prebuilt import tools_condition, ToolNode
 from langgraph.checkpoint.memory import MemorySaver
+from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from langchain_core.tools import tool
+from langchain.tools.retriever import create_retriever_tool
+from supabase.client import Client, create_client
 load_dotenv()
+# Enhanced system prompt for better question answering
+ENHANCED_SYSTEM_PROMPT = """You are a helpful assistant tasked with answering questions using a set of tools.
+CRITICAL INSTRUCTIONS:
+1. Read the question carefully and understand what specific information is being asked
+2. Use the appropriate tools to find the exact information requested
+3. For factual questions, search for current and accurate information
+4. For calculations, use the math tools provided
+5. Always provide specific, direct answers - never repeat the question as your answer
+6. If you cannot find the information, state "Information not available"
+7. Format your final response as: FINAL ANSWER: [your specific answer]
+ANSWER FORMAT RULES:
+- For numbers: provide just the number without commas or units unless specified
+- For names/strings: provide the exact name or term without articles
+- For lists: provide comma-separated values
+- Be concise and specific in your final answer
+Remember: Your job is to ANSWER the question, not repeat it back."""
+# ---- Enhanced Tool Definitions ----
 @tool
 def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
     return a - b
 @tool
 def divide(a: int, b: int) -> float:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
     return a % b
 @tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query.
+    """
     try:
+        time.sleep(random.uniform(0.5, 1.0))  # Rate limiting
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        if not search_docs:
+            return "No Wikipedia results found"
+        formatted_search_docs = "\n\n---\n\n".join([
+            f'<Document source="{doc.metadata.get("source", "Wikipedia")}" title="{doc.metadata.get("title", "")}">\n{doc.page_content[:1500]}\n</Document>'
+            for doc in search_docs
+        ])
+        return formatted_search_docs
+    except Exception as e:
+        return f"Wikipedia search failed: {e}"
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query.
+    """
+    try:
+        time.sleep(random.uniform(0.7, 1.2))  # Rate limiting
         search_tool = TavilySearchResults(max_results=3)
+        search_docs = search_tool.invoke({"query": query})
+        if not search_docs:
+            return "No web search results found"
+        formatted_search_docs = "\n\n---\n\n".join([
+            f'<Document source="{doc.get("url", "")}">\n{doc.get("content", "")[:1200]}\n</Document>'
+            for doc in search_docs
+        ])
+        return formatted_search_docs
     except Exception as e:
         return f"Web search failed: {e}"
 @tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 results.
+    Args:
+        query: The search query.
+    """
     try:
+        time.sleep(random.uniform(0.5, 1.0))  # Rate limiting
+        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+        if not search_docs:
+            return "No ArXiv results found"
+        formatted_search_docs = "\n\n---\n\n".join([
+            f'<Document source="{doc.metadata.get("source", "ArXiv")}" title="{doc.metadata.get("title", "")}">\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+        return formatted_search_docs
     except Exception as e:
+        return f"ArXiv search failed: {e}"
+# Initialize tools list
+tools = [
+    multiply, add, subtract, divide, modulus,
+    wiki_search, web_search, arxiv_search
+]
+# Enhanced State for better tracking
+class EnhancedState(MessagesState):
+    """Enhanced state with additional tracking"""
+    query: str = ""
+    tools_used: List[str] = []
+    search_results: str = ""
+def build_graph(provider: str = "groq"):
+    """Build the enhanced graph with proper error handling and response formatting"""
+    # Initialize LLM based on provider
+    if provider == "google":
+        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
+    elif provider == "groq":
+        llm = ChatGroq(model="llama3-70b-8192", temperature=0)  # Using more reliable model
+    elif provider == "huggingface":
+        llm = ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
+                temperature=0,
+            ),
         )
+    else:
+        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
+    # Bind tools to LLM
+    llm_with_tools = llm.bind_tools(tools)
+    # Initialize vector store if available
+    vector_store = None
+    try:
+        if os.getenv("SUPABASE_URL") and os.getenv("SUPABASE_SERVICE_KEY"):
+            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+            supabase: Client = create_client(
+                os.environ.get("SUPABASE_URL"),
+                os.environ.get("SUPABASE_SERVICE_KEY")
+            )
+            vector_store = SupabaseVectorStore(
+                client=supabase,
+                embedding=embeddings,
+                table_name="documents",
+                query_name="match_documents_langchain",
+            )
+    except Exception as e:
+        print(f"Vector store initialization failed: {e}")
+    def retriever(state: MessagesState):
+        """Enhanced retriever node with fallback"""
+        messages = state["messages"]
+        query = messages[-1].content if messages else ""
+        # Try to get similar questions from vector store
+        similar_context = ""
+        if vector_store:
             try:
+                similar_questions = vector_store.similarity_search(query, k=1)
+                if similar_questions:
+                    similar_context = f"\n\nSimilar example for reference:\n{similar_questions[0].page_content}"
             except Exception as e:
+                print(f"Vector search failed: {e}")
+        # Enhanced system message with context
+        enhanced_prompt = ENHANCED_SYSTEM_PROMPT + similar_context
+        sys_msg = SystemMessage(content=enhanced_prompt)
+        return {"messages": [sys_msg] + messages}
+    def assistant(state: MessagesState):
+        """Enhanced assistant node with better response handling"""
+        try:
+            response = llm_with_tools.invoke(state["messages"])
+            # Ensure response is properly formatted
+            if hasattr(response, 'content'):
+                content = response.content
+                # Check if this is just repeating the question
+                original_query = state["messages"][-1].content if state["messages"] else ""
+                if content.strip() == original_query.strip():
+                    # Force a better response
+                    enhanced_messages = state["messages"] + [
+                        HumanMessage(content=f"Please provide a specific answer to this question, do not repeat the question: {original_query}")
+                    ]
+                    response = llm_with_tools.invoke(enhanced_messages)
+            return {"messages": [response]}
+        except Exception as e:
+            error_response = AIMessage(content=f"Error processing request: {e}")
+            return {"messages": [error_response]}
+    def format_final_answer(state: MessagesState):
+        """Format the final answer properly"""
+        messages = state["messages"]
+        if not messages:
+            return {"messages": [AIMessage(content="FINAL ANSWER: Information not available")]}
+        last_message = messages[-1]
+        if hasattr(last_message, 'content'):
+            content = last_message.content
+            # Ensure proper formatting
+            if "FINAL ANSWER:" not in content:
+                # Extract the key information and format it
+                if content.strip():
+                    formatted_content = f"FINAL ANSWER: {content.strip()}"
                 else:
+                    formatted_content = "FINAL ANSWER: Information not available"
+                formatted_message = AIMessage(content=formatted_content)
+                return {"messages": messages[:-1] + [formatted_message]}
+        return {"messages": messages}
+    # Build the graph
+    builder = StateGraph(MessagesState)
+    # Add nodes
+    builder.add_node("retriever", retriever)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_node("formatter", format_final_answer)
+    # Add edges
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+        {
+            "tools": "tools",
+            "__end__": "formatter"
         }
+    )
+    builder.add_edge("tools", "assistant")
+    builder.add_edge("formatter", END)
+    # Compile graph with checkpointer
+    return builder.compile(checkpointer=MemorySaver())
+# Test function
+def test_agent():
+    """Test the agent with sample questions"""
+    graph = build_graph(provider="groq")
     test_questions = [
+        "How many studio albums were published by Mercedes Sosa between 2000 and 2009?",
         "What is 25 multiplied by 17?",
+        "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2004?"
     ]
     for question in test_questions:
+        print(f"\nQuestion: {question}")
+        print("-" * 60)
+        try:
+            messages = [HumanMessage(content=question)]
+            config = {"configurable": {"thread_id": f"test_{hash(question)}"}}
+            result = graph.invoke({"messages": messages}, config)
+            if result and "messages" in result:
+                final_message = result["messages"][-1]
+                if hasattr(final_message, 'content'):
+                    print(f"Answer: {final_message.content}")
+                else:
+                    print(f"Answer: {final_message}")
+            else:
+                print("Answer: No response generated")
+        except Exception as e:
+            print(f"Error: {e}")
+        print()
+if __name__ == "__main__":
+    # Run tests
+    test_agent()