KoonJamesZ commited on
Commit
f713678
·
verified ·
1 Parent(s): 3ff0eb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -16,10 +16,19 @@ from ultralytics import YOLO
16
  import gradio as gr
17
 
18
  # # default: Load the model on the available device(s)
 
 
 
 
 
19
  model = Qwen2VLForConditionalGeneration.from_pretrained(
20
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
 
 
 
21
  )
22
 
 
23
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
24
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
25
  # "Qwen/Qwen2-VL-7B-Instruct",
 
16
  import gradio as gr
17
 
18
  # # default: Load the model on the available device(s)
19
+ # model = Qwen2VLForConditionalGeneration.from_pretrained(
20
+ # "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
21
+ # )
22
+
23
+ #8-bit quantization
24
  model = Qwen2VLForConditionalGeneration.from_pretrained(
25
+ "Qwen/Qwen2-VL-7B-Instruct",
26
+ torch_dtype=torch.float16,
27
+ load_in_8bit=True, # Use 8-bit quantization
28
+ device_map="auto",
29
  )
30
 
31
+
32
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
33
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
34
  # "Qwen/Qwen2-VL-7B-Instruct",