sykuann1 commited on
Commit
0af46f8
·
verified ·
1 Parent(s): 4d136b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -15,30 +15,26 @@ import gradio as gr
15
  DOC_PATH = './data/pdf_esg'
16
  INDEX_PATH = './storage'
17
  llm = LlamaCPP(
18
- model_url='https://huggingface.co/mistralai/Mistral-7B-v0.1',
19
-
 
20
  # optionally, you can set the path to a pre-downloaded model instead of model_url
21
  model_path=None,
22
-
23
- temperature=0.0,
24
- max_new_tokens=1024,
25
-
26
  # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
27
- context_window=3900, # note, this sets n_ctx in the model_kwargs below, so you don't need to pass it there.
28
-
29
  # kwargs to pass to __call__()
30
  generate_kwargs={},
31
-
32
  # kwargs to pass to __init__()
33
  # set to at least 1 to use GPU
34
- model_kwargs={"n_gpu_layers": 4}, # I need to play with this and see if it actually helps
35
-
36
  # transform inputs into Llama2 format
37
  messages_to_prompt=messages_to_prompt,
38
  completion_to_prompt=completion_to_prompt,
39
  verbose=True,
40
  )
41
- Settings.llm = Ollama(model="mistral")
42
  Settings.llm = llm
43
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
44
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 
15
  DOC_PATH = './data/pdf_esg'
16
  INDEX_PATH = './storage'
17
  llm = LlamaCPP(
18
+ # You can pass in the URL to a GGML model to download it automatically
19
+ # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
20
+ model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf',
21
  # optionally, you can set the path to a pre-downloaded model instead of model_url
22
  model_path=None,
23
+ temperature=0.1,
24
+ max_new_tokens=256,
 
 
25
  # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
26
+ context_window=4096,
 
27
  # kwargs to pass to __call__()
28
  generate_kwargs={},
 
29
  # kwargs to pass to __init__()
30
  # set to at least 1 to use GPU
31
+ model_kwargs={"n_gpu_layers": -1},
 
32
  # transform inputs into Llama2 format
33
  messages_to_prompt=messages_to_prompt,
34
  completion_to_prompt=completion_to_prompt,
35
  verbose=True,
36
  )
37
+ # Settings.llm = Ollama(model="mistral")
38
  Settings.llm = llm
39
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
40
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")