Final_Assignment_Template

Runtime error

App Files Files Community

AdityaPandey commited on May 17

Commit

3a60aaa

verified ·

1 Parent(s): 4598d02

Upload 4 files

Browse files

Files changed (4) hide show

agent.py +67 -0
prompts.py +5 -0
retriever.py +44 -0
tools.py +112 -0

agent.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""LangGraph Agent"""
+import os
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from langchain_core.messages import SystemMessage, HumanMessage
+from prompts import SYS_PROMPT
+from tools import tools
+from retriever import vector_store
+from langchain_openai import ChatOpenAI
+load_dotenv()
+# System message
+sys_msg = SystemMessage(content=SYS_PROMPT)
+# Build graph function
+def build_graph():
+    """Build the graph"""
+    llm = ChatOpenAI(temperature=0.1, model="gpt-4o", openai_api_key=os.getenv("OPENAI_API_KEY"))
+    # Bind tools to LLM
+    llm_with_tools = llm.bind_tools(tools)
+    # Node
+    def assistant(state: MessagesState):
+        """Assistant node"""
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    def retriever(state: MessagesState):
+        """Retriever node"""
+        similar_question = vector_store.similarity_search(state["messages"][0].content, k=3)
+        similar_question_content = "\n".join([f"{idx+1}. {doc.page_content}" for idx, doc in enumerate(similar_question)])
+        example_msg = HumanMessage(
+            content=f"Here I provide some similar questions and answer for reference in case you can't find answer from tool result: \n\n{similar_question_content}",
+        )
+        return {"messages": [sys_msg] + state["messages"] + [example_msg]}
+    builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
+    builder.add_conditional_edges(
+        "assistant",
+        tools_condition,
+    )
+    builder.add_edge("tools", "assistant")
+    # Compile graph
+    return builder.compile()
+# test
+if __name__ == "__main__":
+    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    # Build the graph
+    graph = build_graph()
+    # Run the graph
+    messages = [HumanMessage(content=question)]
+    messages = graph.invoke({"messages": messages})
+    answer = messages['messages'][-1].content
+    for m in messages["messages"]:
+        m.pretty_print()

prompts.py ADDED Viewed

	@@ -0,0 +1,5 @@

+SYS_PROMPT = """You are a helpful assistant tasked with answering questions using a set of tools.
+Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Your answer should only start with "FINAL ANSWER: ", then follows with the answer."""

retriever.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from langchain_chroma import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_core.documents import Document
+import json
+from uuid import uuid4
+print("Loading embedding model...")
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+vector_store = Chroma(
+    collection_name="example_collection",
+    embedding_function=embeddings,
+    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
+)
+# Load the metadata.jsonl file
+with open('metadata.jsonl', 'r') as jsonl_file:
+    json_list = list(jsonl_file)
+json_QA = []
+for json_str in json_list:
+    json_data = json.loads(json_str)
+    json_QA.append(json_data)
+docs = []
+for idx, sample in enumerate(json_QA):
+    content = f"Question: {sample['Question']}\n\nFinal answer: {sample['Final answer']}"
+    doc = Document(
+        page_content=content,
+        metadata={
+            "source": sample['task_id'],
+        },
+        id=str(uuid4()),
+    )
+    docs.append(doc)
+# Add documents to the vector store
+print("Adding documents to the vector store...")
+vector_store.add_documents(documents=docs)
+del docs
+del json_QA

tools.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_core.documents import Document
+SEP_CHAR = "\n\n---\n\n"
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b
+def wiki_search(query: str) -> dict:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs: list[Document] = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = SEP_CHAR.join(
+        [
+            f'<Document source="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+def web_search(query: str) -> dict:
+    """Search Web for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    search_docs: list[dict] = DuckDuckGoSearchResults(num_results=3, output_format='list').invoke(input=query)
+    formatted_search_docs = SEP_CHAR.join(
+        [
+            f'<Document source="{doc["link"]}" title="{doc.get("title", "")}"/>\n{doc["snippet"]}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+def arvix_search(query: str) -> dict:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    search_docs: list[Document] = ArxivLoader(query=query).load()
+    formatted_search_docs = SEP_CHAR.join(
+        [
+            f'<Document title="{doc.metadata["Title"]}" authors="{doc.metadata.get("Authors", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+tools = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
+    wiki_search,
+    web_search,
+    arvix_search,
+]