DesiredName commited on
Commit
6247e60
·
verified ·
1 Parent(s): f10fba2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -2,7 +2,7 @@ from transformers import AutoTokenizer
2
  from exllamav2 import (
3
  ExLlamaV2,
4
  ExLlamaV2Config,
5
- ExLlamaV2Cache,
6
  ExLlamaV2Tokenizer
7
  )
8
  from exllamav2.generator import (
@@ -10,6 +10,11 @@ from exllamav2.generator import (
10
  ExLlamaV2Sampler
11
  )
12
  import torch
 
 
 
 
 
13
 
14
  # Configure model
15
  model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
@@ -21,7 +26,7 @@ config.prepare()
21
 
22
  # Load model
23
  model = ExLlamaV2(config)
24
- cache = ExLlamaV2Cache(model)
25
  model.load_autosplit(cache)
26
 
27
  # Load tokenizer (HF-compatible)
 
2
  from exllamav2 import (
3
  ExLlamaV2,
4
  ExLlamaV2Config,
5
+ ExLlamaV2Cache_CPU,
6
  ExLlamaV2Tokenizer
7
  )
8
  from exllamav2.generator import (
 
10
  ExLlamaV2Sampler
11
  )
12
  import torch
13
+ import os
14
+
15
+ # disable CUDA
16
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Disables GPU detection
17
+ os.environ["EXLLAMA_NO_CUDA"] = "1" # Forces CPU mode in ExLlamaV2
18
 
19
  # Configure model
20
  model_dir = "TheBloke_Wizard-Vicuna-13B-GPTQ" # Path to downloaded model
 
26
 
27
  # Load model
28
  model = ExLlamaV2(config)
29
+ cache = ExLlamaV2Cache_CPU(model)
30
  model.load_autosplit(cache)
31
 
32
  # Load tokenizer (HF-compatible)