FM-1976 commited on
Commit
0ce5e68
·
1 Parent(s): 0721572

Update app.py

Browse files

llm = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.Q5_0.gguf"),
),
n_ctx=2048,
n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
)
used example from https://huggingface.co/spaces/SpacesExamples/llama-cpp-python-cuda-gradio/blob/main/app.py

Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -1,10 +1,16 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
  import datetime
 
4
 
5
  #MODEL SETTINGS also for DISPLAY
6
  convHistory = ''
7
- modelfile = "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/blob/main/starling-lm-7b-alpha.Q4_K_M.gguf"
 
 
 
 
 
8
  repetitionpenalty = 1.15
9
  contextlength=8192
10
  logfile = 'StarlingLM7B_logs.txt'
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
  import datetime
4
+ from huggingface_hub import hf_hub_download
5
 
6
  #MODEL SETTINGS also for DISPLAY
7
  convHistory = ''
8
+ modelfile = hf_hub_download(
9
+ repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
10
+ filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.Q5_0.gguf"),
11
+ )
12
+
13
+ #"https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/blob/main/starling-lm-7b-alpha.Q4_K_M.gguf"
14
  repetitionpenalty = 1.15
15
  contextlength=8192
16
  logfile = 'StarlingLM7B_logs.txt'