Spaces:

sabazo
/

insurance_advisor_wb

Running

App Files Files Community

Asaad Almutareb commited on Jul 7, 2024

Commit

3dfd099

1 Parent(s): 8eb79b5

added an agent as a tool

Browse files

Files changed (7) hide show

.devcontainer/Dockerfile +3 -1
rag_app/agents/kb_retriever_agent.py +73 -0
rag_app/agents/react_agent.py +5 -3
rag_app/loading_data/load_S3_vector_stores.py +3 -36
rag_app/structured_tools/agent_tools.py +0 -0
rag_app/structured_tools/structured_tools.py +15 -3
rag_app/templates/react_json_ger.py +47 -0

.devcontainer/Dockerfile CHANGED Viewed

@@ -44,4 +44,6 @@ RUN echo "done 0" \
     && pyenv global ${PYTHON_VERSION} \
     && echo "done 3" \
     && curl -sSL https://install.python-poetry.org | python3 - \
-    && poetry config virtualenvs.in-project true

     && pyenv global ${PYTHON_VERSION} \
     && echo "done 3" \
     && curl -sSL https://install.python-poetry.org | python3 - \
+    && poetry config virtualenvs.in-project true \
+    && echo "done 4" \
+    && pip install -r requirements.txt

rag_app/agents/kb_retriever_agent.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# HF libraries
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain.agents import AgentExecutor
+from langchain.agents.format_scratchpad import format_log_to_str
+from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
+# Import things that are needed generically
+from langchain.tools.render import render_text_description
+import os
+from dotenv import load_dotenv
+from rag_app.structured_tools.structured_tools import (
+    google_search, knowledgeBase_search
+)
+from langchain.prompts import PromptTemplate
+from rag_app.templates.react_json_with_memory import template_system
+# from innovation_pathfinder_ai.utils import logger
+# from langchain.globals import set_llm_cache
+# from langchain.cache import SQLiteCache
+# set_llm_cache(SQLiteCache(database_path=".cache.db"))
+# logger = logger.get_console_logger("hf_mixtral_agent")
+config = load_dotenv(".env")
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
+GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+# Load the model from the Hugging Face Hub
+llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
+                          temperature=0.1,
+                          max_new_tokens=1024,
+                          repetition_penalty=1.2,
+                          return_full_text=False
+    )
+tools = [
+    knowledgeBase_search,
+    google_search,
+    ]
+prompt = PromptTemplate.from_template(
+    template=template_system
+)
+prompt = prompt.partial(
+    tools=render_text_description(tools),
+    tool_names=", ".join([t.name for t in tools]),
+)
+# define the agent
+chat_model_with_stop = llm.bind(stop=["\nObservation"])
+agent = (
+    {
+        "input": lambda x: x["input"],
+        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
+        #"chat_history": lambda x: x["chat_history"],
+    }
+    | prompt
+    | chat_model_with_stop
+    | ReActJsonSingleInputOutputParser()
+)
+# instantiate AgentExecutor
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=tools,
+    verbose=True,
+    max_iterations=10,       # cap number of iterations
+    #max_execution_time=60,  # timout at 60 sec
+    #return_intermediate_steps=True,
+    handle_parsing_errors=True,
+    )

rag_app/agents/react_agent.py CHANGED Viewed

@@ -8,7 +8,8 @@ from langchain.tools.render import render_text_description
 import os
 from dotenv import load_dotenv
 from rag_app.structured_tools.structured_tools import (
-    google_search, knowledgeBase_search
 )
 from langchain.prompts import PromptTemplate
@@ -39,8 +40,9 @@ llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
 tools = [
-    knowledgeBase_search,
-    google_search,
     ]
 prompt = PromptTemplate.from_template(

 import os
 from dotenv import load_dotenv
 from rag_app.structured_tools.structured_tools import (
+    #google_search, knowledgeBase_search,
+    web_research
 )
 from langchain.prompts import PromptTemplate
 tools = [
+    #knowledgeBase_search,
+    #google_search,
+    web_research
     ]
 prompt = PromptTemplate.from_template(

rag_app/loading_data/load_S3_vector_stores.py CHANGED Viewed

@@ -10,7 +10,6 @@ from dotenv import load_dotenv
 import os
 import sys
 import logging
-from pathlib import Path
 # Load environment variables from a .env file
 config = load_dotenv(".env")
@@ -39,7 +38,6 @@ def get_faiss_vs():
     # Define the destination for the downloaded file
     VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
     try:
         # Download the pre-prepared vectorized index from the S3 bucket
         print("Downloading the pre-prepared FAISS vectorized index from S3...")
@@ -49,36 +47,11 @@ def get_faiss_vs():
         with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
             zip_ref.extractall('./vectorstore/')
         print("Download and extraction completed.")
-        return FAISS.load_local(FAISS_INDEX_PATH, embeddings,allow_dangerous_deserialization=True)
     except Exception as e:
         print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
-    # faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
-def get_faiss_vs_from_s3(s3_loc:str,
-                         s3_vs_name:str,
-                         vs_dir:str='vectorstore') -> None:
-    """ Download the FAISS vector store from S3 bucket
-        Args:
-            s3_loc (str): Name of the S3 bucket
-            s3_vs_name (str): Name of the file to be downloaded
-            vs_dir (str): The name of the directory where the file is to be saved
-    """
-    # Initialize an S3 client with unsigned configuration for public access
-    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
-    # Destination folder
-    vs_dir_path = Path("..") / vs_dir
-    assert vs_dir_path.is_dir(), "Cannot find vs_dir folder"
-    try:
-        vs_destination = Path("..") / vs_dir / "faiss-insurance-agent-500.zip"
-        s3.download_file(s3_loc, s3_vs_name, vs_destination)
-        # Extract the downloaded zip file
-        with zipfile.ZipFile(file=vs_destination, mode='r') as zip_ref:
-            zip_ref.extractall(path=vs_dir_path.as_posix())
-    except Exception as e:
-        print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
 ## Chroma DB
@@ -97,10 +70,4 @@ def get_chroma_vs():
         chromadb = Chroma(persist_directory=CHROMA_DIRECTORY, embedding_function=embeddings)
         chromadb.get()
     except Exception as e:
-        print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
-if __name__ == "__main__":
-    # get_faiss_vs_from_s3(s3_loc=S3_LOCATION, s3_vs_name=FAISS_VS_NAME)
-    pass

 import os
 import sys
 import logging
 # Load environment variables from a .env file
 config = load_dotenv(".env")
     # Define the destination for the downloaded file
     VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
     try:
         # Download the pre-prepared vectorized index from the S3 bucket
         print("Downloading the pre-prepared FAISS vectorized index from S3...")
         with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
             zip_ref.extractall('./vectorstore/')
         print("Download and extraction completed.")
+        return FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
     except Exception as e:
         print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
+    #faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
 ## Chroma DB
         chromadb = Chroma(persist_directory=CHROMA_DIRECTORY, embedding_function=embeddings)
         chromadb.get()
     except Exception as e:
+        print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)

rag_app/structured_tools/agent_tools.py ADDED Viewed

File without changes

rag_app/structured_tools/structured_tools.py CHANGED Viewed

@@ -17,6 +17,7 @@ from rag_app.utils.utils import (
 from rag_app.database.db_handler import (
     add_many
 )
 import os
 # from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
@@ -71,7 +72,7 @@ def knowledgeBase_search(query:str) -> str:
     embedding_function=embedding_function,
     )
-    retriever = vector_db.as_retriever()
     # This is deprecated, changed to invoke
     # LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.
     docs = retriever.invoke(query)
@@ -83,7 +84,6 @@ def knowledgeBase_search(query:str) -> str:
 @tool
 def google_search(query: str) -> str:
     """Verbessere die Ergebnisse durch eine Suche über die Webseite der Versicherung. Erstelle eine neue Suchanfrage, um die Erfolgschancen zu verbesseren."""
-    global all_sources
     websearch = GoogleSearchAPIWrapper()
     search_results:dict = websearch.results(query, 3)
@@ -95,4 +95,16 @@ def google_search(query: str) -> str:
     else:
         cleaner_sources = search_results
-    return cleaner_sources.__str__()

 from rag_app.database.db_handler import (
     add_many
 )
+from rag_app.agents.kb_retriever_agent import agent_executor
 import os
 # from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
     embedding_function=embedding_function,
     )
+    retriever = vector_db.as_retriever(search_kwargs={'k':1})
     # This is deprecated, changed to invoke
     # LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.
     docs = retriever.invoke(query)
 @tool
 def google_search(query: str) -> str:
     """Verbessere die Ergebnisse durch eine Suche über die Webseite der Versicherung. Erstelle eine neue Suchanfrage, um die Erfolgschancen zu verbesseren."""
     websearch = GoogleSearchAPIWrapper()
     search_results:dict = websearch.results(query, 3)
     else:
         cleaner_sources = search_results
+    return cleaner_sources.__str__()
+@tool
+def web_research(query: str) -> str:
+    """Verbessere die Ergebnisse durch eine Suche über die Webseite der Versicherung. Erstelle eine neue Suchanfrage, um die Erfolgschancen zu verbesseren."""
+    result = agent_executor.invoke(
+        {
+            "input": query
+        }
+    )
+    print(result)
+    return result.__str__()

rag_app/templates/react_json_ger.py ADDED Viewed

	@@ -0,0 +1,47 @@

+template_system = """
+Answer the following questions as best you can. You have access to the following tools:
+<TOOLS>
+{tools}
+</TOOLS>
+The way you use the tools is by specifying a json blob.
+Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).
+The only values that should be in the "action" field are: {tool_names}
+The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:
+```
+{{
+  "action": $TOOL_NAME,
+  "action_input": $INPUT
+}}
+```
+ALWAYS use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action:
+```
+$JSON_BLOB
+```
+Observation: the result of the action
+... (this Thought/Action/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin! Reminder to always use the exact characters `Final Answer` when responding.
+Previous conversation history:
+<CONVERSATION_HISTORY>
+{chat_history}
+</CONVERSATION_HISTORY>
+<NEW_INPUT>
+{input}
+</NEW_INPUT>
+{agent_scratchpad}
+"""