Spaces:
Sleeping
Sleeping
Commit
·
4cab03f
1
Parent(s):
cd09dff
tweak 2
Browse files
app.py
CHANGED
@@ -7,23 +7,13 @@ from huggingface_hub import hf_hub_download
|
|
7 |
# huggingface-cli download microsoft/Phi-3-mini-4k-instruct-gguf Phi-3-mini-4k-instruct-q4.gguf --local-dir .
|
8 |
# huggingface-cli download LoneStriker/OpenBioLLM-Llama3-8B-GGUF --local-dir ./llama3-gguf
|
9 |
|
10 |
-
# Define the model name and file
|
11 |
-
model_name = "LoneStriker/OpenBioLLM-Llama3-8B-GGUF"
|
12 |
-
model_file = "Llama3-8B-Q5_K_M.gguf"
|
13 |
-
|
14 |
-
# Define the local directory path within the Docker container
|
15 |
-
local_dir = "/usr/src/app/llama3-gguf"
|
16 |
-
|
17 |
-
# Ensure that the local directory exists
|
18 |
-
os.makedirs(local_dir, exist_ok=True)
|
19 |
-
|
20 |
-
# Download the model to the specified local directory
|
21 |
-
model_path = hf_hub_download(model_name, filename=model_file, local_dir=local_dir)
|
22 |
-
|
23 |
llm = Llama(
|
24 |
# model_path="./Phi-3-mini-4k-instruct-q4.gguf",
|
25 |
# model_path="./llama3-gguf/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
|
26 |
-
model_path =
|
|
|
|
|
|
|
27 |
n_ctx=2048,
|
28 |
n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
|
29 |
)
|
|
|
7 |
# huggingface-cli download microsoft/Phi-3-mini-4k-instruct-gguf Phi-3-mini-4k-instruct-q4.gguf --local-dir .
|
8 |
# huggingface-cli download LoneStriker/OpenBioLLM-Llama3-8B-GGUF --local-dir ./llama3-gguf
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
llm = Llama(
|
11 |
# model_path="./Phi-3-mini-4k-instruct-q4.gguf",
|
12 |
# model_path="./llama3-gguf/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
|
13 |
+
model_path = hf_hub_download(
|
14 |
+
repo_id=os.environ.get("REPO_ID", "LoneStriker/OpenBioLLM-Llama3-8B-GGUF"),
|
15 |
+
filename=os.environ.get("MODEL_FILE", "Llama3-8B-Q5_K_M.gguf"),
|
16 |
+
),
|
17 |
n_ctx=2048,
|
18 |
n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
|
19 |
)
|