Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ from transformers import AutoTokenizer
|
|
2 |
from exllamav2 import (
|
3 |
ExLlamaV2,
|
4 |
ExLlamaV2Config,
|
5 |
-
|
6 |
ExLlamaV2Tokenizer
|
7 |
)
|
8 |
from exllamav2.generator import (
|
@@ -10,6 +10,11 @@ from exllamav2.generator import (
|
|
10 |
ExLlamaV2Sampler
|
11 |
)
|
12 |
import torch
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Configure model
|
15 |
model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
|
@@ -21,7 +26,7 @@ config.prepare()
|
|
21 |
|
22 |
# Load model
|
23 |
model = ExLlamaV2(config)
|
24 |
-
cache =
|
25 |
model.load_autosplit(cache)
|
26 |
|
27 |
# Load tokenizer (HF-compatible)
|
|
|
2 |
from exllamav2 import (
|
3 |
ExLlamaV2,
|
4 |
ExLlamaV2Config,
|
5 |
+
ExLlamaV2Cache_CPU,
|
6 |
ExLlamaV2Tokenizer
|
7 |
)
|
8 |
from exllamav2.generator import (
|
|
|
10 |
ExLlamaV2Sampler
|
11 |
)
|
12 |
import torch
|
13 |
+
import os
|
14 |
+
|
15 |
+
# disable CUDA
|
16 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Disables GPU detection
|
17 |
+
os.environ["EXLLAMA_NO_CUDA"] = "1" # Forces CPU mode in ExLlamaV2
|
18 |
|
19 |
# Configure model
|
20 |
model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
|
|
|
26 |
|
27 |
# Load model
|
28 |
model = ExLlamaV2(config)
|
29 |
+
cache = ExLlamaV2Cache_CPU(model)
|
30 |
model.load_autosplit(cache)
|
31 |
|
32 |
# Load tokenizer (HF-compatible)
|