Spaces:

srossitto79
/

AgentLlama007B

Runtime error

App Files Files Community

srossitto79 commited on Dec 2, 2023

Commit

5fc56a3

2 Parent(s): 5ec7b76 ffc33c4

Merge branch 'main' of https://huggingface.co/spaces/srossitto79/AgentLlama007B

Browse files

Files changed (5) hide show

RBotReloaded.py +40 -26
README.md +18 -27
agent_llama_ui.py +16 -6
requirements.txt +6 -4
start_agent.bat +1 -1

RBotReloaded.py CHANGED Viewed

@@ -15,7 +15,7 @@ import google_free_search
 from langchain.vectorstores import FAISS # For storing embeddings
 from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA
 from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools
-from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, TextLoader, WebBaseLoader # Loaders
 from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs
 from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history
 from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text
@@ -70,11 +70,11 @@ def validate_and_fix_params(tool_name, params_list):
 def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
   if (model_id.startswith("http")):
     print(f"Creating TextGen LLM base_url:{model_id}")
-    return TextGen(model_url=model_id, callbacks=[StreamingStdOutCallbackHandler()])
   if (os.path.exists(model_id)):
     try:
       print(f"Creating LlamaCpp LLM model_id:{model_id}")
-      return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p)
     except Exception as ex:
       try:
         print(f"Creating CTransformers LLM model_id:{model_id}")
@@ -85,7 +85,7 @@ def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf"
           "top_p":top_p,
           "temperature":temperature
         }
-        return CTransformers(model=model_id, model_type='llama', config=config)
       except Exception as ex:
         print(f"Load Error {str(ex)}")
@@ -120,7 +120,7 @@ class StorageRetrievalLLM:
       # Load pages
       loader = DirectoryLoader(stored_pages_folder, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
       documents = loader.load()
       # Split into chunks
       text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100)
       documents = text_splitter.split_documents(documents)
@@ -179,12 +179,11 @@ class StorageRetrievalLLM:
     # Load file
     file_path = doc if os.path.exists(doc) else os.path.join("data", doc)
-    loader = DirectoryLoader(file_path, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
-    documents = loader.load()
-    # Split and add
-    splitter = RecursiveCharacterTextSplitter()
-    documents = splitter.split_documents(documents)
     self.vectorstore.add_documents(documents)
     # Update chain
@@ -200,12 +199,8 @@ class StorageRetrievalLLM:
   def addTextFileToMemory(self, file_path : str, summarize = True):
     # Load file
-    loader = TextLoader(path=file_path, loader_cls=PyMuPDFLoader)
-    documents = loader.load()
-    # Split and add
-    splitter = RecursiveCharacterTextSplitter()
-    documents = splitter.split_documents(documents)
     self.vectorstore.add_documents(documents)
     # Update chain
@@ -278,12 +273,13 @@ AI: SearchAndReply("recent cryptocurrency news")
 USER: Can you calculate the factorial of 5?
 AI: Calculator("factorial(5)")
-### CURRENT CONVERSATION:
-SYS: Today is {str(datetime.now().date())},
-SYS: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge.
 {formatted_history}
-SYS: Please reply next user message directly or invoking a valid action from the following list:
-{self.tools_prompt()}
 USER: {input}
 AI:
 """
@@ -321,8 +317,19 @@ AI:
             if f"{tool}:" in line.lower() or f"{tool}(" in line.lower():
               action_name = tool
               action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","")
-              print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
               if (action_name and action_input): break
       # Call tool if found
       if action_name and action_input:
@@ -342,8 +349,15 @@ AI:
             print(f"Action Output: {res}")
             observations.append(f"Action Output: {res}")
-            prompt = prompt + f"Action: {tool.name}({action_input})\nSYS: {res}\nAI:"
-            final_response = res # just in case it reaches max iterations
       else:
         final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
         print(f"Final Anser: {final_response}")
@@ -382,7 +396,7 @@ class SmartAgent:
     # Create agent
     self.smartAgent = self.create_smart_agent()
-    print("Smart Agent Initialized")
   def reset_context(self):
       self.chat_history.clear()

 from langchain.vectorstores import FAISS # For storing embeddings
 from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA
 from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools
+from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, PyPDFLoader, TextLoader, WebBaseLoader # Loaders
 from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs
 from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history
 from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text
 def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
   if (model_id.startswith("http")):
     print(f"Creating TextGen LLM base_url:{model_id}")
+    return TextGen(model_url=model_id, seed=79, callbacks=[StreamingStdOutCallbackHandler()])
   if (os.path.exists(model_id)):
     try:
       print(f"Creating LlamaCpp LLM model_id:{model_id}")
+      return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, seed=79, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p)
     except Exception as ex:
       try:
         print(f"Creating CTransformers LLM model_id:{model_id}")
           "top_p":top_p,
           "temperature":temperature
         }
+        return CTransformers(model=model_id, model_type='llama', seed=79, config=config)
       except Exception as ex:
         print(f"Load Error {str(ex)}")
       # Load pages
       loader = DirectoryLoader(stored_pages_folder, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
       documents = loader.load()
       # Split into chunks
       text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100)
       documents = text_splitter.split_documents(documents)
     # Load file
     file_path = doc if os.path.exists(doc) else os.path.join("data", doc)
+    self.stored_pages_folder
+    # loader = DirectoryLoader(file_path, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
+    # documents = loader.load()
+    loader = PyPDFLoader(file_path)
+    documents = loader.load_and_split()
     self.vectorstore.add_documents(documents)
     # Update chain
   def addTextFileToMemory(self, file_path : str, summarize = True):
     # Load file
+    loader = PyPDFLoader(file_path)
+    documents = loader.load_and_split()
     self.vectorstore.add_documents(documents)
     # Update chain
 USER: Can you calculate the factorial of 5?
 AI: Calculator("factorial(5)")
+### REAL CONVERSATION:
+[INST]
+SYSTEM: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge.
+Today is {str(datetime.now().date())}, please reply last user message directly or invoking a valid action from the following list:
+{self.tools_prompt()}
+[/INST]
 {formatted_history}
 USER: {input}
 AI:
 """
             if f"{tool}:" in line.lower() or f"{tool}(" in line.lower():
               action_name = tool
               action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","")
+              if (len(action_input) < 2):
+                action_input = None
+              else:
+                print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
+              if (action_name and action_input): break
+            elif action_name and not action_input:
+              action_input = line[line.find(":") + 1:].replace("\"","")
+              if (len(action_input) < 2):
+                action_input = None
+              else:
+                print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
               if (action_name and action_input): break
       # Call tool if found
       if action_name and action_input:
             print(f"Action Output: {res}")
             observations.append(f"Action Output: {res}")
+            prompt = prompt + f"Action: {tool.name}({action_input})\SYSTEM:{res}"
+            if (i+1 == self.max_iterations):
+                print(f"MAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!")
+                prompt = prompt + "\nMAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!\nAI:"
+                output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","SYSTEM:","[INST]","[/INST]"])).strip()
+                final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
+                return { "output": final_response }
+            else:
+                prompt = prompt + "\nAI:"
       else:
         final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
         print(f"Final Anser: {final_response}")
     # Create agent
     self.smartAgent = self.create_smart_agent()
+    print(f"Smart Agent Initialized - CUDA Support:{torch.cuda.is_available()}")
   def reset_context(self):
       self.chat_history.clear()

README.md CHANGED Viewed

@@ -21,27 +21,26 @@ AgentLlama007B is a powerful Conversational AI Assistant designed for natural la
 - **Natural Language Conversations**: Engage in human-like conversations powered by local language models.
 - **Tool Integration**: Execute various tools, including image generation, web search, Wikipedia queries, and more, all within the conversation.
-- **Persistent Memory**: Contextual knowledge is stored in a vector database, providing continuity and enhancing the conversational experience.
 - **Modular Architecture**: Easily extend AgentLlama007B with additional skills and tools to suit your specific needs.
 ## Getting Started
-To start using AgentLlama007B, follow these simple steps:
-Clone the repo and create a folder "models", than download the Models you need from hugging face and put them in the models folder.
-I use mistral-7b-instruct-v0.1.Q4_K_M.gguf for chat/instructions and dreamshaper_8 for images generation (:P you'll need dreamshaper_8.json and dreamshaper_8.safetensors)
-1. Install the required dependencies by running `pip install -r requirements.txt`.
-2. Run the main Streamlit app:
-```bash
-streamlit run agent_llama_ui.py
-```
-3. Alternatively, you can integrate the agent into your Python code:
-```python
 from agent_llama import SmartAgent
 agent = SmartAgent()
@@ -52,36 +51,28 @@ while True:
     print("Bot:", response)
 ```
-For more details on customization, model configuration, and tool parameters, refer to the code documentation.
 ## Implementation
 AgentLlama007B's core logic is encapsulated in the `RBotAgent` class, which manages the conversational flow and tool integration. The knowledge base tool, `StorageRetrievalLLM`, uses persistent memory with a FAISS index of document embeddings. Various tools are provided, each encapsulating specific skills such as image generation and web search. The modular architecture allows easy replacement of components like the language model.
 ## Why it matters
-AgentLlama007B demonstrates the power of modern conversational AI in a real-world setting. Unlike many research prototypes, it runs smoothly on consumer hardware - a single 8 core CPU with 16GB of RAM.
-Remarkably, AgentLlama007B achieves language understanding and task automation using a quantized 7B parameter model. This is orders of magnitude smaller than models that power other conversational agents. For example, ChatGPT4 use a 180B parameter model.
-In practice, this means AgentLlama007B can understand free-form instructions and execute complex workflows, the most of the times :-).
 ## Credits
-AgentLlama007B has been evaluated using TheBloke's Mistral-7B-Instruct-v0.1-GGUF model. This 7 billion parameter model was converted from [MistralAI's original Mistral-7B architecture](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), i personally find this 7b model impressive.
 This project was created by Salvatore Rossitto as a passion project and a learning endeavor. Contributions from the community are welcome and encouraged.
 ## License
-[MistralAI's original Mistral-7B architecture](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
-[TheBloke MistralAI's Mistral-7B GGUF architecture](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
-AgentLlama007B is an open-source project released under the MIT license.
-You are free to use, modify, and distribute it as per the terms of the license.
-The LLM model downloaded is subject to the original author license.

 - **Natural Language Conversations**: Engage in human-like conversations powered by local language models.
 - **Tool Integration**: Execute various tools, including image generation, web search, Wikipedia queries, and more, all within the conversation.
+- **Knowledge Base Memory**: Documents knowledge is stored in a vector database, you can store there your own documents and texts, providing an extra mile in the conversational experience.
 - **Modular Architecture**: Easily extend AgentLlama007B with additional skills and tools to suit your specific needs.
 ## Getting Started
+To start using AgentLlama007B, follow these steps:
+1. Clone the repository and create a folder named "models". Download the necessary models from Hugging Face and place them in the "models" folder. For chat/instructions, use "mistral-7b-instruct-v0.1.Q4_K_M.gguf", and for image generation, use "dreamshaper_8" (requires "dreamshaper_8.json" and "dreamshaper_8.safetensors").
+2. Install the required dependencies by running `pip install -r requirements.txt`.
+3. Run the main Streamlit app:
+   ```bash
+   streamlit run agent_llama_ui.py
+   ```
+Alternatively, you can integrate the agent into your Python code:
+   ```python
 from agent_llama import SmartAgent
 agent = SmartAgent()
     print("Bot:", response)
 ```
+For more details on customization, model configuration, and tool parameters, refer to the code documentation and to the original model repositories.
 ## Implementation
 AgentLlama007B's core logic is encapsulated in the `RBotAgent` class, which manages the conversational flow and tool integration. The knowledge base tool, `StorageRetrievalLLM`, uses persistent memory with a FAISS index of document embeddings. Various tools are provided, each encapsulating specific skills such as image generation and web search. The modular architecture allows easy replacement of components like the language model.
 ## Why it matters
+AgentLlama007B demonstrates the power of modern conversational AI in a real-world setting. It runs smoothly on consumer hardware with a single 8-core CPU and 16GB of RAM.
+Remarkably, AgentLlama007B achieves language understanding and task automation using a quantized 7 billion parameter model, which is significantly smaller than models used by other conversational agents. This makes it efficient and practical for various applications.
 ## Credits
+AgentLlama007B has been evaluated using TheBloke's Mistral-7B-Instruct-v0.1-GGUF model. This 7 billion parameter model was converted from MistralAI's original Mistral-7B architecture. The 7B model is impressive in its capabilities.
 This project was created by Salvatore Rossitto as a passion project and a learning endeavor. Contributions from the community are welcome and encouraged.
 ## License
+AgentLlama007B is an open-source project released under the MIT license. You are free to use, modify, and distribute it according to the terms of the license.
+```
+The Mistral-7B-Instruct-v0.1 model by MistralAI and TheBloke's Mistral-7B-Instruct-v0.1-GGUF model are subject to their respective licenses. Please refer to the original authors' licenses for more information.
+```

agent_llama_ui.py CHANGED Viewed

@@ -18,8 +18,8 @@ load_dotenv()
 default_model = ""
 default_context = 8192
 default_load_type = "Auto"
-default_iterations = 2
-default_temperature = 0.5
 default_topp = 0.95
 @st.cache_resource
@@ -144,7 +144,8 @@ def get_index_size():
 # def factory():
 #     return current_agent().smartAgent
-def render_simple_chat():
     models = get_models()
     models.append("")
@@ -186,6 +187,14 @@ def render_simple_chat():
     uploaded_file = st.sidebar.file_uploader("Drag and Drop a File to ./knowledge_base/", type=["txt", "pdf", "docx"])
     if st.sidebar.button("Reset Long Term Memory", disabled=not (current_agent() is not None and get_index_size() > 0)) and current_agent() is not None:
         current_agent().reset_knowledge()
@@ -213,10 +222,11 @@ def render_simple_chat():
         generated_files = get_generated_files()
         st.sidebar.subheader("Generated Files")
         for file_path in generated_files:
-            file_name = file_path.split("/")[-1].split("\\")[-1]
             st.write("---")
-            st.markdown(f"[{file_name}]({file_path})", unsafe_allow_html=True)
-            st.image(file_path)
     i = 0
     for m in history():

 default_model = ""
 default_context = 8192
 default_load_type = "Auto"
+default_iterations = 3
+default_temperature = 0.2
 default_topp = 0.95
 @st.cache_resource
 # def factory():
 #     return current_agent().smartAgent
+def render_simple_chat():
+    st.markdown("<h3 style='text-align: center;'>To fully utilize all functionalities of this demo, you'll require a minimum of a 16-core CPU and 32GB of RAM. Please note that the limited resources available in Huggingface free spaces may lead to slow responses and potential crashes due to out-of-memory issues during image generation.</h3>", unsafe_allow_html=True)
     models = get_models()
     models.append("")
     uploaded_file = st.sidebar.file_uploader("Drag and Drop a File to ./knowledge_base/", type=["txt", "pdf", "docx"])
+    knowledge_files = glob.glob(f"./knowledge_base/*.*")
+    st.sidebar.subheader("Knowledge Files")
+    for file_path in knowledge_files:
+        if not "index." in file_path.lower():
+            file_path = file_path.replace("\\", "/")
+            file_name = file_path.split("/")[-1]
+            st.sidebar.markdown(f"[{file_name}](/{file_path})", unsafe_allow_html=True)
     if st.sidebar.button("Reset Long Term Memory", disabled=not (current_agent() is not None and get_index_size() > 0)) and current_agent() is not None:
         current_agent().reset_knowledge()
         generated_files = get_generated_files()
         st.sidebar.subheader("Generated Files")
         for file_path in generated_files:
+            file_path = file_path.replace("\\", "/")
+            file_name = file_path.split("/")[-1]
             st.write("---")
+            st.markdown(f"[{file_name}](/{file_path})", unsafe_allow_html=True)
+            st.image(file_path, use_column_width=True)
     i = 0
     for m in history():

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-torch
-torchaudio
-torchvision
 accelerate
 aiohttp
 anyio
@@ -23,12 +24,13 @@ numexpr
 llama_cpp_python
 psutil
 PyMuPDF
 safetensors
 selenium
 sentence-transformers
 sentencepiece
 streamlit_chat
-streamlit>=0.86.0
 textblob
 undetected-chromedriver
 urllib3

+numpy>=1.24.1
+torch --index-url https://download.pytorch.org/whl/cu118
+torchvision --index-url https://download.pytorch.org/whl/cu118
+torchaudio --index-url https://download.pytorch.org/whl/cu118
 accelerate
 aiohttp
 anyio
 llama_cpp_python
 psutil
 PyMuPDF
+pypdf
 safetensors
 selenium
 sentence-transformers
 sentencepiece
+streamlit>=1.25.0
 streamlit_chat
 textblob
 undetected-chromedriver
 urllib3

start_agent.bat CHANGED Viewed

@@ -13,7 +13,7 @@ rem Activate the virtual environment
 call %ENV_NAME%\Scripts\activate
 rem Install the required packages from requirements.txt
-python -m pip install --user -r requirements.txt
 rem Run your Streamlit application
 python -m streamlit run agent_llama_ui.py

 call %ENV_NAME%\Scripts\activate
 rem Install the required packages from requirements.txt
+python -m pip install -r requirements.txt
 rem Run your Streamlit application
 python -m streamlit run agent_llama_ui.py