Spaces:

FM-1976
/

Starling7B-PlayGround

Runtime error

FM-1976 commited on Dec 7, 2023

Commit

0ce5e68

1 Parent(s): 0721572

Update app.py

llm = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.Q5_0.gguf"),
),
n_ctx=2048,
n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
)
used example from https://huggingface.co/spaces/SpacesExamples/llama-cpp-python-cuda-gradio/blob/main/app.py

Files changed (1) hide show

app.py +7 -1

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import gradio as gr
 from llama_cpp import Llama
 import datetime
 #MODEL SETTINGS also for DISPLAY
 convHistory = ''
-modelfile = "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/blob/main/starling-lm-7b-alpha.Q4_K_M.gguf"
 repetitionpenalty = 1.15
 contextlength=8192
 logfile = 'StarlingLM7B_logs.txt'

 import gradio as gr
 from llama_cpp import Llama
 import datetime
+from huggingface_hub import hf_hub_download
 #MODEL SETTINGS also for DISPLAY
 convHistory = ''
+modelfile = hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7b-Chat-GGUF"),
+        filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.Q5_0.gguf"),
+    )
+#"https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/blob/main/starling-lm-7b-alpha.Q4_K_M.gguf"
 repetitionpenalty = 1.15
 contextlength=8192
 logfile = 'StarlingLM7B_logs.txt'