init
Browse files- app.py +1 -1
- config/infer.yaml +1 -1
- model/fastchat/serve/inference.py +5 -2
- pre-requirements.txt +1 -0
app.py
CHANGED
|
@@ -94,4 +94,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 94 |
clear_button.click(
|
| 95 |
clear_chat, inputs=[chatbot], outputs=[txt, chatbot])
|
| 96 |
|
| 97 |
-
demo.launch(
|
|
|
|
| 94 |
clear_button.click(
|
| 95 |
clear_chat, inputs=[chatbot], outputs=[txt, chatbot])
|
| 96 |
|
| 97 |
+
demo.launch()
|
config/infer.yaml
CHANGED
|
@@ -4,7 +4,7 @@ vicuna:
|
|
| 4 |
model_path: '/home/user/app/vicuna-7b'
|
| 5 |
# model_path: '/mnt/petrelfs/wangyiqin/vid_cap/ChatVID_huggingface/vicuna-7b'
|
| 6 |
device: 'cuda'
|
| 7 |
-
num_gpus:
|
| 8 |
max_gpu_memory: '24Gib'
|
| 9 |
load_8bit: True
|
| 10 |
conv_template:
|
|
|
|
| 4 |
model_path: '/home/user/app/vicuna-7b'
|
| 5 |
# model_path: '/mnt/petrelfs/wangyiqin/vid_cap/ChatVID_huggingface/vicuna-7b'
|
| 6 |
device: 'cuda'
|
| 7 |
+
num_gpus: 'auto'
|
| 8 |
max_gpu_memory: '24Gib'
|
| 9 |
load_8bit: True
|
| 10 |
conv_template:
|
model/fastchat/serve/inference.py
CHANGED
|
@@ -80,6 +80,9 @@ def load_model(
|
|
| 80 |
kwargs = {}
|
| 81 |
elif device == "cuda":
|
| 82 |
kwargs = {"torch_dtype": torch.float16}
|
|
|
|
|
|
|
|
|
|
| 83 |
if num_gpus == "auto":
|
| 84 |
kwargs["device_map"] = "auto"
|
| 85 |
else:
|
|
@@ -134,8 +137,8 @@ def load_model(
|
|
| 134 |
)
|
| 135 |
raise_warning_for_old_weights(model_path, model)
|
| 136 |
|
| 137 |
-
if load_8bit:
|
| 138 |
-
|
| 139 |
|
| 140 |
if (device == "cuda" and num_gpus == 1) or device == "mps":
|
| 141 |
model.to(device)
|
|
|
|
| 80 |
kwargs = {}
|
| 81 |
elif device == "cuda":
|
| 82 |
kwargs = {"torch_dtype": torch.float16}
|
| 83 |
+
if load_8bit:
|
| 84 |
+
kwargs = {"load_in_8bit": True}
|
| 85 |
+
|
| 86 |
if num_gpus == "auto":
|
| 87 |
kwargs["device_map"] = "auto"
|
| 88 |
else:
|
|
|
|
| 137 |
)
|
| 138 |
raise_warning_for_old_weights(model_path, model)
|
| 139 |
|
| 140 |
+
# if load_8bit:
|
| 141 |
+
# compress_module(model, device)
|
| 142 |
|
| 143 |
if (device == "cuda" and num_gpus == 1) or device == "mps":
|
| 144 |
model.to(device)
|
pre-requirements.txt
CHANGED
|
@@ -13,4 +13,5 @@ regex
|
|
| 13 |
tqdm
|
| 14 |
openai-whisper
|
| 15 |
accelerate
|
|
|
|
| 16 |
sentencepiece
|
|
|
|
| 13 |
tqdm
|
| 14 |
openai-whisper
|
| 15 |
accelerate
|
| 16 |
+
bitsandbytes
|
| 17 |
sentencepiece
|