Update README.md
Browse files
README.md
CHANGED
@@ -30,9 +30,8 @@ pip install -vvv --no-build-isolation -e .
|
|
30 |
|
31 |
### Sample code
|
32 |
```
|
33 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
34 |
-
from optimum.gptq import GPTQQuantizer, load_quantized_model
|
35 |
import torch
|
|
|
36 |
model_name = "webbigdata/C3TR-Adapter_gptq"
|
37 |
|
38 |
# thanks to tk-master
|
@@ -41,9 +40,11 @@ config = AutoConfig.from_pretrained(model_name)
|
|
41 |
config.quantization_config["use_exllama"] = False
|
42 |
config.quantization_config["exllama_config"] = {"version":2}
|
43 |
|
|
|
44 |
max_memory={0: "12GiB", "cpu": "10GiB"}
|
|
|
45 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_name
|
46 |
-
, torch_dtype=torch.bfloat16 #
|
47 |
, device_map="auto", max_memory=max_memory
|
48 |
, config=config)
|
49 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
30 |
|
31 |
### Sample code
|
32 |
```
|
|
|
|
|
33 |
import torch
|
34 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
35 |
model_name = "webbigdata/C3TR-Adapter_gptq"
|
36 |
|
37 |
# thanks to tk-master
|
|
|
40 |
config.quantization_config["use_exllama"] = False
|
41 |
config.quantization_config["exllama_config"] = {"version":2}
|
42 |
|
43 |
+
# adust your gpu memory size. 0 means first gpu.
|
44 |
max_memory={0: "12GiB", "cpu": "10GiB"}
|
45 |
+
|
46 |
quantized_model = AutoModelForCausalLM.from_pretrained(model_name
|
47 |
+
, torch_dtype=torch.bfloat16 # change torch.float16 if you use free colab or something not support bfloat16.
|
48 |
, device_map="auto", max_memory=max_memory
|
49 |
, config=config)
|
50 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|