Spaces:

sykuann1
/

esg_chatbot

Sleeping

sykuann1 commited on Jun 20, 2024

Commit

0af46f8

verified ·

1 Parent(s): 4d136b6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,30 +15,26 @@ import gradio as gr
 DOC_PATH = './data/pdf_esg'
 INDEX_PATH = './storage'
 llm = LlamaCPP(
-    model_url='https://huggingface.co/mistralai/Mistral-7B-v0.1',
     # optionally, you can set the path to a pre-downloaded model instead of model_url
     model_path=None,
-    temperature=0.0,
-    max_new_tokens=1024,
     # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
-    context_window=3900,  # note, this sets n_ctx in the model_kwargs below, so you don't need to pass it there.
     # kwargs to pass to __call__()
     generate_kwargs={},
     # kwargs to pass to __init__()
     # set to at least 1 to use GPU
-    model_kwargs={"n_gpu_layers": 4}, # I need to play with this and see if it actually helps
     # transform inputs into Llama2 format
     messages_to_prompt=messages_to_prompt,
     completion_to_prompt=completion_to_prompt,
     verbose=True,
 )
-Settings.llm = Ollama(model="mistral")
 Settings.llm = llm
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

 DOC_PATH = './data/pdf_esg'
 INDEX_PATH = './storage'
 llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
+    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf',
     # optionally, you can set the path to a pre-downloaded model instead of model_url
     model_path=None,
+    temperature=0.1,
+    max_new_tokens=256,
     # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
+    context_window=4096,
     # kwargs to pass to __call__()
     generate_kwargs={},
     # kwargs to pass to __init__()
     # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": -1},
     # transform inputs into Llama2 format
     messages_to_prompt=messages_to_prompt,
     completion_to_prompt=completion_to_prompt,
     verbose=True,
 )
+# Settings.llm = Ollama(model="mistral")
 Settings.llm = llm
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")