init
Browse files- app.py +1 -1
- config/infer.yaml +1 -1
- model/fastchat/serve/inference.py +5 -2
- pre-requirements.txt +1 -0
app.py
CHANGED
@@ -94,4 +94,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
94 |
clear_button.click(
|
95 |
clear_chat, inputs=[chatbot], outputs=[txt, chatbot])
|
96 |
|
97 |
-
demo.launch(
|
|
|
94 |
clear_button.click(
|
95 |
clear_chat, inputs=[chatbot], outputs=[txt, chatbot])
|
96 |
|
97 |
+
demo.launch()
|
config/infer.yaml
CHANGED
@@ -4,7 +4,7 @@ vicuna:
|
|
4 |
model_path: '/home/user/app/vicuna-7b'
|
5 |
# model_path: '/mnt/petrelfs/wangyiqin/vid_cap/ChatVID_huggingface/vicuna-7b'
|
6 |
device: 'cuda'
|
7 |
-
num_gpus:
|
8 |
max_gpu_memory: '24Gib'
|
9 |
load_8bit: True
|
10 |
conv_template:
|
|
|
4 |
model_path: '/home/user/app/vicuna-7b'
|
5 |
# model_path: '/mnt/petrelfs/wangyiqin/vid_cap/ChatVID_huggingface/vicuna-7b'
|
6 |
device: 'cuda'
|
7 |
+
num_gpus: 'auto'
|
8 |
max_gpu_memory: '24Gib'
|
9 |
load_8bit: True
|
10 |
conv_template:
|
model/fastchat/serve/inference.py
CHANGED
@@ -80,6 +80,9 @@ def load_model(
|
|
80 |
kwargs = {}
|
81 |
elif device == "cuda":
|
82 |
kwargs = {"torch_dtype": torch.float16}
|
|
|
|
|
|
|
83 |
if num_gpus == "auto":
|
84 |
kwargs["device_map"] = "auto"
|
85 |
else:
|
@@ -134,8 +137,8 @@ def load_model(
|
|
134 |
)
|
135 |
raise_warning_for_old_weights(model_path, model)
|
136 |
|
137 |
-
if load_8bit:
|
138 |
-
|
139 |
|
140 |
if (device == "cuda" and num_gpus == 1) or device == "mps":
|
141 |
model.to(device)
|
|
|
80 |
kwargs = {}
|
81 |
elif device == "cuda":
|
82 |
kwargs = {"torch_dtype": torch.float16}
|
83 |
+
if load_8bit:
|
84 |
+
kwargs = {"load_in_8bit": True}
|
85 |
+
|
86 |
if num_gpus == "auto":
|
87 |
kwargs["device_map"] = "auto"
|
88 |
else:
|
|
|
137 |
)
|
138 |
raise_warning_for_old_weights(model_path, model)
|
139 |
|
140 |
+
# if load_8bit:
|
141 |
+
# compress_module(model, device)
|
142 |
|
143 |
if (device == "cuda" and num_gpus == 1) or device == "mps":
|
144 |
model.to(device)
|
pre-requirements.txt
CHANGED
@@ -13,4 +13,5 @@ regex
|
|
13 |
tqdm
|
14 |
openai-whisper
|
15 |
accelerate
|
|
|
16 |
sentencepiece
|
|
|
13 |
tqdm
|
14 |
openai-whisper
|
15 |
accelerate
|
16 |
+
bitsandbytes
|
17 |
sentencepiece
|