Spaces:

srossitto79
/

AgentLlama007B

Runtime error

App Files Files Community

srossitto79 commited on Dec 2, 2023

Commit

5ec7b76

1 Parent(s): d13f3dd

added airLLM

Browse files

Files changed (6) hide show

.DS_Store +0 -0
AirLLM.py +52 -0
RBotReloaded.py +6 -2
agent_llama_ui.py +2 -1
requirements.txt +2 -1
start_agent.sh +22 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

AirLLM.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Any, List, Mapping, Optional
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+from langchain.llms.base import LLM
+from airllm import AirLLMLlama2
+class AirLLM(LLM):
+    max_len: int
+    model: AirLLMLlama2
+    def __init__(self, llama2_model_id : str, max_len : int, compression = ""):
+        # could use hugging face model repo id:
+        self.model = AirLLMLlama2(llama2_model_id,compression=compression)
+        self.max_len = max_len
+    @property
+    def _llm_type(self) -> str:
+        return "custom"
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        if stop is not None:
+            raise ValueError("stop kwargs are not permitted.")
+        input_tokens = model.tokenizer(input_text,
+            return_tensors="pt",
+            return_attention_mask=False,
+            truncation=True,
+            max_length=self.max_len,
+            padding=True)
+        generation_output = model.generate(
+            input_tokens['input_ids'].cuda(),
+            max_new_tokens=20,
+            use_cache=True,
+            return_dict_in_generate=True)
+        output = model.tokenizer.decode(generation_output.sequences[0])
+        return output
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {"max_len": self.max_len}

RBotReloaded.py CHANGED Viewed

@@ -34,12 +34,13 @@ from typing import Any, Dict, List
 import torch
 from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt
 import inspect
 # Config
 EMBD_CHUNK_SIZE = 512
 AI_NAME = "Agent Llama"
 USER_NAME = "Buddy"
-MODELS_DIR = "models"
 def validate_and_fix_params(tool_name, params_list):
     try:
@@ -66,7 +67,7 @@ def validate_and_fix_params(tool_name, params_list):
         return []
 # Helper to load LM
-def create_llm(model_id=f"{MODELS_DIR}/mistral-7b-instruct-v0.1.Q4_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
   if (model_id.startswith("http")):
     print(f"Creating TextGen LLM base_url:{model_id}")
     return TextGen(model_url=model_id, callbacks=[StreamingStdOutCallbackHandler()])
@@ -89,6 +90,9 @@ def create_llm(model_id=f"{MODELS_DIR}/mistral-7b-instruct-v0.1.Q4_K_M.gguf", lo
       except Exception as ex:
         print(f"Load Error {str(ex)}")
         return None
 # Class to store pages and run queries
 class StorageRetrievalLLM:

 import torch
 from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt
 import inspect
+import AirLLM
 # Config
 EMBD_CHUNK_SIZE = 512
 AI_NAME = "Agent Llama"
 USER_NAME = "Buddy"
+MODELS_DIR = "./models"
 def validate_and_fix_params(tool_name, params_list):
     try:
         return []
 # Helper to load LM
+def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
   if (model_id.startswith("http")):
     print(f"Creating TextGen LLM base_url:{model_id}")
     return TextGen(model_url=model_id, callbacks=[StreamingStdOutCallbackHandler()])
       except Exception as ex:
         print(f"Load Error {str(ex)}")
         return None
+  else:
+    print(f"Trying AirLLM to load model_id:{model_id}")
+    return AirLLM(llama2_model_id=model_id, max_len=ctx_len, compression=("4bit" if load_4bit else "8bit" if load_8bit else ""))
 # Class to store pages and run queries
 class StorageRetrievalLLM:

agent_llama_ui.py CHANGED Viewed

@@ -15,7 +15,7 @@ from langchain.schema import AIMessage, HumanMessage
 load_dotenv()
-default_model = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
 default_context = 8192
 default_load_type = "Auto"
 default_iterations = 2
@@ -43,6 +43,7 @@ def get_models():
     models = os.listdir(models_directory)
     # Filter out any subdirectories, if any
     models = [model for model in models if (model.lower().split(".")[-1] in supported_extensions) and os.path.isfile(os.path.join(models_directory, model))]
     if len(models) == 0:
         st.write("Downloading models")
         from huggingface_hub import hf_hub_download

 load_dotenv()
+default_model = ""
 default_context = 8192
 default_load_type = "Auto"
 default_iterations = 2
     models = os.listdir(models_directory)
     # Filter out any subdirectories, if any
     models = [model for model in models if (model.lower().split(".")[-1] in supported_extensions) and os.path.isfile(os.path.join(models_directory, model))]
     if len(models) == 0:
         st.write("Downloading models")
         from huggingface_hub import hf_hub_download

requirements.txt CHANGED Viewed

@@ -42,4 +42,5 @@ Pillow
 langchain
 googletrans
 python-dotenv
-omegaconf

 langchain
 googletrans
 python-dotenv
+omegaconf
+airllm

start_agent.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/bin/bash
+# Define the name of your virtual environment
+ENV_NAME="myenv"
+# Check if the virtual environment folder exists
+if [ ! -d "$ENV_NAME" ]; then
+    # Create a new virtual environment
+    python -m venv $ENV_NAME
+fi
+# Activate the virtual environment
+source $ENV_NAME/bin/activate
+# Install the required packages from requirements.txt
+python -m pip install -r requirements.txt
+# Run your Streamlit application
+python -m streamlit run agent_llama_ui.py
+# Deactivate the virtual environment
+deactivate