srossitto79 commited on
Commit
5fc56a3
·
2 Parent(s): 5ec7b76 ffc33c4

Merge branch 'main' of https://huggingface.co/spaces/srossitto79/AgentLlama007B

Browse files
Files changed (5) hide show
  1. RBotReloaded.py +40 -26
  2. README.md +18 -27
  3. agent_llama_ui.py +16 -6
  4. requirements.txt +6 -4
  5. start_agent.bat +1 -1
RBotReloaded.py CHANGED
@@ -15,7 +15,7 @@ import google_free_search
15
  from langchain.vectorstores import FAISS # For storing embeddings
16
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA
17
  from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools
18
- from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, TextLoader, WebBaseLoader # Loaders
19
  from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs
20
  from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history
21
  from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text
@@ -70,11 +70,11 @@ def validate_and_fix_params(tool_name, params_list):
70
  def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
71
  if (model_id.startswith("http")):
72
  print(f"Creating TextGen LLM base_url:{model_id}")
73
- return TextGen(model_url=model_id, callbacks=[StreamingStdOutCallbackHandler()])
74
  if (os.path.exists(model_id)):
75
  try:
76
  print(f"Creating LlamaCpp LLM model_id:{model_id}")
77
- return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p)
78
  except Exception as ex:
79
  try:
80
  print(f"Creating CTransformers LLM model_id:{model_id}")
@@ -85,7 +85,7 @@ def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf"
85
  "top_p":top_p,
86
  "temperature":temperature
87
  }
88
- return CTransformers(model=model_id, model_type='llama', config=config)
89
 
90
  except Exception as ex:
91
  print(f"Load Error {str(ex)}")
@@ -120,7 +120,7 @@ class StorageRetrievalLLM:
120
  # Load pages
121
  loader = DirectoryLoader(stored_pages_folder, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
122
  documents = loader.load()
123
-
124
  # Split into chunks
125
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100)
126
  documents = text_splitter.split_documents(documents)
@@ -179,12 +179,11 @@ class StorageRetrievalLLM:
179
 
180
  # Load file
181
  file_path = doc if os.path.exists(doc) else os.path.join("data", doc)
182
- loader = DirectoryLoader(file_path, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
183
- documents = loader.load()
184
-
185
- # Split and add
186
- splitter = RecursiveCharacterTextSplitter()
187
- documents = splitter.split_documents(documents)
188
  self.vectorstore.add_documents(documents)
189
 
190
  # Update chain
@@ -200,12 +199,8 @@ class StorageRetrievalLLM:
200
  def addTextFileToMemory(self, file_path : str, summarize = True):
201
 
202
  # Load file
203
- loader = TextLoader(path=file_path, loader_cls=PyMuPDFLoader)
204
- documents = loader.load()
205
-
206
- # Split and add
207
- splitter = RecursiveCharacterTextSplitter()
208
- documents = splitter.split_documents(documents)
209
  self.vectorstore.add_documents(documents)
210
 
211
  # Update chain
@@ -278,12 +273,13 @@ AI: SearchAndReply("recent cryptocurrency news")
278
  USER: Can you calculate the factorial of 5?
279
  AI: Calculator("factorial(5)")
280
 
281
- ### CURRENT CONVERSATION:
282
- SYS: Today is {str(datetime.now().date())},
283
- SYS: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge.
 
 
 
284
  {formatted_history}
285
- SYS: Please reply next user message directly or invoking a valid action from the following list:
286
- {self.tools_prompt()}
287
  USER: {input}
288
  AI:
289
  """
@@ -321,8 +317,19 @@ AI:
321
  if f"{tool}:" in line.lower() or f"{tool}(" in line.lower():
322
  action_name = tool
323
  action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","")
324
- print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
 
 
 
 
 
 
 
 
 
 
325
  if (action_name and action_input): break
 
326
 
327
  # Call tool if found
328
  if action_name and action_input:
@@ -342,8 +349,15 @@ AI:
342
 
343
  print(f"Action Output: {res}")
344
  observations.append(f"Action Output: {res}")
345
- prompt = prompt + f"Action: {tool.name}({action_input})\nSYS: {res}\nAI:"
346
- final_response = res # just in case it reaches max iterations
 
 
 
 
 
 
 
347
  else:
348
  final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
349
  print(f"Final Anser: {final_response}")
@@ -382,7 +396,7 @@ class SmartAgent:
382
  # Create agent
383
  self.smartAgent = self.create_smart_agent()
384
 
385
- print("Smart Agent Initialized")
386
 
387
  def reset_context(self):
388
  self.chat_history.clear()
 
15
  from langchain.vectorstores import FAISS # For storing embeddings
16
  from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA
17
  from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools
18
+ from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, PyPDFLoader, TextLoader, WebBaseLoader # Loaders
19
  from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs
20
  from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history
21
  from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text
 
70
  def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
71
  if (model_id.startswith("http")):
72
  print(f"Creating TextGen LLM base_url:{model_id}")
73
+ return TextGen(model_url=model_id, seed=79, callbacks=[StreamingStdOutCallbackHandler()])
74
  if (os.path.exists(model_id)):
75
  try:
76
  print(f"Creating LlamaCpp LLM model_id:{model_id}")
77
+ return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, seed=79, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p)
78
  except Exception as ex:
79
  try:
80
  print(f"Creating CTransformers LLM model_id:{model_id}")
 
85
  "top_p":top_p,
86
  "temperature":temperature
87
  }
88
+ return CTransformers(model=model_id, model_type='llama', seed=79, config=config)
89
 
90
  except Exception as ex:
91
  print(f"Load Error {str(ex)}")
 
120
  # Load pages
121
  loader = DirectoryLoader(stored_pages_folder, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
122
  documents = loader.load()
123
+
124
  # Split into chunks
125
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100)
126
  documents = text_splitter.split_documents(documents)
 
179
 
180
  # Load file
181
  file_path = doc if os.path.exists(doc) else os.path.join("data", doc)
182
+ self.stored_pages_folder
183
+ # loader = DirectoryLoader(file_path, glob="**/*.pdf", loader_cls=PyMuPDFLoader)
184
+ # documents = loader.load()
185
+ loader = PyPDFLoader(file_path)
186
+ documents = loader.load_and_split()
 
187
  self.vectorstore.add_documents(documents)
188
 
189
  # Update chain
 
199
  def addTextFileToMemory(self, file_path : str, summarize = True):
200
 
201
  # Load file
202
+ loader = PyPDFLoader(file_path)
203
+ documents = loader.load_and_split()
 
 
 
 
204
  self.vectorstore.add_documents(documents)
205
 
206
  # Update chain
 
273
  USER: Can you calculate the factorial of 5?
274
  AI: Calculator("factorial(5)")
275
 
276
+ ### REAL CONVERSATION:
277
+ [INST]
278
+ SYSTEM: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge.
279
+ Today is {str(datetime.now().date())}, please reply last user message directly or invoking a valid action from the following list:
280
+ {self.tools_prompt()}
281
+ [/INST]
282
  {formatted_history}
 
 
283
  USER: {input}
284
  AI:
285
  """
 
317
  if f"{tool}:" in line.lower() or f"{tool}(" in line.lower():
318
  action_name = tool
319
  action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","")
320
+ if (len(action_input) < 2):
321
+ action_input = None
322
+ else:
323
+ print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
324
+ if (action_name and action_input): break
325
+ elif action_name and not action_input:
326
+ action_input = line[line.find(":") + 1:].replace("\"","")
327
+ if (len(action_input) < 2):
328
+ action_input = None
329
+ else:
330
+ print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
331
  if (action_name and action_input): break
332
+
333
 
334
  # Call tool if found
335
  if action_name and action_input:
 
349
 
350
  print(f"Action Output: {res}")
351
  observations.append(f"Action Output: {res}")
352
+ prompt = prompt + f"Action: {tool.name}({action_input})\SYSTEM:{res}"
353
+ if (i+1 == self.max_iterations):
354
+ print(f"MAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!")
355
+ prompt = prompt + "\nMAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!\nAI:"
356
+ output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","SYSTEM:","[INST]","[/INST]"])).strip()
357
+ final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
358
+ return { "output": final_response }
359
+ else:
360
+ prompt = prompt + "\nAI:"
361
  else:
362
  final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
363
  print(f"Final Anser: {final_response}")
 
396
  # Create agent
397
  self.smartAgent = self.create_smart_agent()
398
 
399
+ print(f"Smart Agent Initialized - CUDA Support:{torch.cuda.is_available()}")
400
 
401
  def reset_context(self):
402
  self.chat_history.clear()
README.md CHANGED
@@ -21,27 +21,26 @@ AgentLlama007B is a powerful Conversational AI Assistant designed for natural la
21
 
22
  - **Natural Language Conversations**: Engage in human-like conversations powered by local language models.
23
  - **Tool Integration**: Execute various tools, including image generation, web search, Wikipedia queries, and more, all within the conversation.
24
- - **Persistent Memory**: Contextual knowledge is stored in a vector database, providing continuity and enhancing the conversational experience.
25
  - **Modular Architecture**: Easily extend AgentLlama007B with additional skills and tools to suit your specific needs.
26
 
27
  ## Getting Started
28
 
29
- To start using AgentLlama007B, follow these simple steps:
30
 
31
- Clone the repo and create a folder "models", than download the Models you need from hugging face and put them in the models folder.
32
- I use mistral-7b-instruct-v0.1.Q4_K_M.gguf for chat/instructions and dreamshaper_8 for images generation (:P you'll need dreamshaper_8.json and dreamshaper_8.safetensors)
33
 
34
- 1. Install the required dependencies by running `pip install -r requirements.txt`.
35
 
36
- 2. Run the main Streamlit app:
37
 
38
- ```bash
39
- streamlit run agent_llama_ui.py
40
- ```
41
 
42
- 3. Alternatively, you can integrate the agent into your Python code:
43
 
44
- ```python
45
  from agent_llama import SmartAgent
46
 
47
  agent = SmartAgent()
@@ -52,36 +51,28 @@ while True:
52
  print("Bot:", response)
53
  ```
54
 
55
- For more details on customization, model configuration, and tool parameters, refer to the code documentation.
56
 
57
  ## Implementation
58
 
59
  AgentLlama007B's core logic is encapsulated in the `RBotAgent` class, which manages the conversational flow and tool integration. The knowledge base tool, `StorageRetrievalLLM`, uses persistent memory with a FAISS index of document embeddings. Various tools are provided, each encapsulating specific skills such as image generation and web search. The modular architecture allows easy replacement of components like the language model.
60
 
61
-
62
  ## Why it matters
63
 
64
- AgentLlama007B demonstrates the power of modern conversational AI in a real-world setting. Unlike many research prototypes, it runs smoothly on consumer hardware - a single 8 core CPU with 16GB of RAM.
65
-
66
- Remarkably, AgentLlama007B achieves language understanding and task automation using a quantized 7B parameter model. This is orders of magnitude smaller than models that power other conversational agents. For example, ChatGPT4 use a 180B parameter model.
67
-
68
- In practice, this means AgentLlama007B can understand free-form instructions and execute complex workflows, the most of the times :-).
69
 
 
70
 
71
  ## Credits
72
 
73
- AgentLlama007B has been evaluated using TheBloke's Mistral-7B-Instruct-v0.1-GGUF model. This 7 billion parameter model was converted from [MistralAI's original Mistral-7B architecture](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), i personally find this 7b model impressive.
74
 
75
  This project was created by Salvatore Rossitto as a passion project and a learning endeavor. Contributions from the community are welcome and encouraged.
76
 
77
  ## License
78
 
79
- [MistralAI's original Mistral-7B architecture](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
80
-
81
- [TheBloke MistralAI's Mistral-7B GGUF architecture](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
82
-
83
- AgentLlama007B is an open-source project released under the MIT license.
84
- You are free to use, modify, and distribute it as per the terms of the license.
85
-
86
- The LLM model downloaded is subject to the original author license.
87
 
 
 
 
 
21
 
22
  - **Natural Language Conversations**: Engage in human-like conversations powered by local language models.
23
  - **Tool Integration**: Execute various tools, including image generation, web search, Wikipedia queries, and more, all within the conversation.
24
+ - **Knowledge Base Memory**: Documents knowledge is stored in a vector database, you can store there your own documents and texts, providing an extra mile in the conversational experience.
25
  - **Modular Architecture**: Easily extend AgentLlama007B with additional skills and tools to suit your specific needs.
26
 
27
  ## Getting Started
28
 
29
+ To start using AgentLlama007B, follow these steps:
30
 
31
+ 1. Clone the repository and create a folder named "models". Download the necessary models from Hugging Face and place them in the "models" folder. For chat/instructions, use "mistral-7b-instruct-v0.1.Q4_K_M.gguf", and for image generation, use "dreamshaper_8" (requires "dreamshaper_8.json" and "dreamshaper_8.safetensors").
 
32
 
33
+ 2. Install the required dependencies by running `pip install -r requirements.txt`.
34
 
35
+ 3. Run the main Streamlit app:
36
 
37
+ ```bash
38
+ streamlit run agent_llama_ui.py
39
+ ```
40
 
41
+ Alternatively, you can integrate the agent into your Python code:
42
 
43
+ ```python
44
  from agent_llama import SmartAgent
45
 
46
  agent = SmartAgent()
 
51
  print("Bot:", response)
52
  ```
53
 
54
+ For more details on customization, model configuration, and tool parameters, refer to the code documentation and to the original model repositories.
55
 
56
  ## Implementation
57
 
58
  AgentLlama007B's core logic is encapsulated in the `RBotAgent` class, which manages the conversational flow and tool integration. The knowledge base tool, `StorageRetrievalLLM`, uses persistent memory with a FAISS index of document embeddings. Various tools are provided, each encapsulating specific skills such as image generation and web search. The modular architecture allows easy replacement of components like the language model.
59
 
 
60
  ## Why it matters
61
 
62
+ AgentLlama007B demonstrates the power of modern conversational AI in a real-world setting. It runs smoothly on consumer hardware with a single 8-core CPU and 16GB of RAM.
 
 
 
 
63
 
64
+ Remarkably, AgentLlama007B achieves language understanding and task automation using a quantized 7 billion parameter model, which is significantly smaller than models used by other conversational agents. This makes it efficient and practical for various applications.
65
 
66
  ## Credits
67
 
68
+ AgentLlama007B has been evaluated using TheBloke's Mistral-7B-Instruct-v0.1-GGUF model. This 7 billion parameter model was converted from MistralAI's original Mistral-7B architecture. The 7B model is impressive in its capabilities.
69
 
70
  This project was created by Salvatore Rossitto as a passion project and a learning endeavor. Contributions from the community are welcome and encouraged.
71
 
72
  ## License
73
 
74
+ AgentLlama007B is an open-source project released under the MIT license. You are free to use, modify, and distribute it according to the terms of the license.
 
 
 
 
 
 
 
75
 
76
+ ```
77
+ The Mistral-7B-Instruct-v0.1 model by MistralAI and TheBloke's Mistral-7B-Instruct-v0.1-GGUF model are subject to their respective licenses. Please refer to the original authors' licenses for more information.
78
+ ```
agent_llama_ui.py CHANGED
@@ -18,8 +18,8 @@ load_dotenv()
18
  default_model = ""
19
  default_context = 8192
20
  default_load_type = "Auto"
21
- default_iterations = 2
22
- default_temperature = 0.5
23
  default_topp = 0.95
24
 
25
  @st.cache_resource
@@ -144,7 +144,8 @@ def get_index_size():
144
  # def factory():
145
  # return current_agent().smartAgent
146
 
147
- def render_simple_chat():
 
148
  models = get_models()
149
  models.append("")
150
 
@@ -186,6 +187,14 @@ def render_simple_chat():
186
 
187
  uploaded_file = st.sidebar.file_uploader("Drag and Drop a File to ./knowledge_base/", type=["txt", "pdf", "docx"])
188
 
 
 
 
 
 
 
 
 
189
  if st.sidebar.button("Reset Long Term Memory", disabled=not (current_agent() is not None and get_index_size() > 0)) and current_agent() is not None:
190
  current_agent().reset_knowledge()
191
 
@@ -213,10 +222,11 @@ def render_simple_chat():
213
  generated_files = get_generated_files()
214
  st.sidebar.subheader("Generated Files")
215
  for file_path in generated_files:
216
- file_name = file_path.split("/")[-1].split("\\")[-1]
 
217
  st.write("---")
218
- st.markdown(f"[{file_name}]({file_path})", unsafe_allow_html=True)
219
- st.image(file_path)
220
 
221
  i = 0
222
  for m in history():
 
18
  default_model = ""
19
  default_context = 8192
20
  default_load_type = "Auto"
21
+ default_iterations = 3
22
+ default_temperature = 0.2
23
  default_topp = 0.95
24
 
25
  @st.cache_resource
 
144
  # def factory():
145
  # return current_agent().smartAgent
146
 
147
+ def render_simple_chat():
148
+ st.markdown("<h3 style='text-align: center;'>To fully utilize all functionalities of this demo, you'll require a minimum of a 16-core CPU and 32GB of RAM. Please note that the limited resources available in Huggingface free spaces may lead to slow responses and potential crashes due to out-of-memory issues during image generation.</h3>", unsafe_allow_html=True)
149
  models = get_models()
150
  models.append("")
151
 
 
187
 
188
  uploaded_file = st.sidebar.file_uploader("Drag and Drop a File to ./knowledge_base/", type=["txt", "pdf", "docx"])
189
 
190
+ knowledge_files = glob.glob(f"./knowledge_base/*.*")
191
+ st.sidebar.subheader("Knowledge Files")
192
+ for file_path in knowledge_files:
193
+ if not "index." in file_path.lower():
194
+ file_path = file_path.replace("\\", "/")
195
+ file_name = file_path.split("/")[-1]
196
+ st.sidebar.markdown(f"[{file_name}](/{file_path})", unsafe_allow_html=True)
197
+
198
  if st.sidebar.button("Reset Long Term Memory", disabled=not (current_agent() is not None and get_index_size() > 0)) and current_agent() is not None:
199
  current_agent().reset_knowledge()
200
 
 
222
  generated_files = get_generated_files()
223
  st.sidebar.subheader("Generated Files")
224
  for file_path in generated_files:
225
+ file_path = file_path.replace("\\", "/")
226
+ file_name = file_path.split("/")[-1]
227
  st.write("---")
228
+ st.markdown(f"[{file_name}](/{file_path})", unsafe_allow_html=True)
229
+ st.image(file_path, use_column_width=True)
230
 
231
  i = 0
232
  for m in history():
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
- torch
2
- torchaudio
3
- torchvision
 
4
  accelerate
5
  aiohttp
6
  anyio
@@ -23,12 +24,13 @@ numexpr
23
  llama_cpp_python
24
  psutil
25
  PyMuPDF
 
26
  safetensors
27
  selenium
28
  sentence-transformers
29
  sentencepiece
 
30
  streamlit_chat
31
- streamlit>=0.86.0
32
  textblob
33
  undetected-chromedriver
34
  urllib3
 
1
+ numpy>=1.24.1
2
+ torch --index-url https://download.pytorch.org/whl/cu118
3
+ torchvision --index-url https://download.pytorch.org/whl/cu118
4
+ torchaudio --index-url https://download.pytorch.org/whl/cu118
5
  accelerate
6
  aiohttp
7
  anyio
 
24
  llama_cpp_python
25
  psutil
26
  PyMuPDF
27
+ pypdf
28
  safetensors
29
  selenium
30
  sentence-transformers
31
  sentencepiece
32
+ streamlit>=1.25.0
33
  streamlit_chat
 
34
  textblob
35
  undetected-chromedriver
36
  urllib3
start_agent.bat CHANGED
@@ -13,7 +13,7 @@ rem Activate the virtual environment
13
  call %ENV_NAME%\Scripts\activate
14
 
15
  rem Install the required packages from requirements.txt
16
- python -m pip install --user -r requirements.txt
17
 
18
  rem Run your Streamlit application
19
  python -m streamlit run agent_llama_ui.py
 
13
  call %ENV_NAME%\Scripts\activate
14
 
15
  rem Install the required packages from requirements.txt
16
+ python -m pip install -r requirements.txt
17
 
18
  rem Run your Streamlit application
19
  python -m streamlit run agent_llama_ui.py