KoonJamesZ commited on
Commit
886676d
·
verified ·
1 Parent(s): f713678

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -4,30 +4,28 @@ import torch
4
  import uuid
5
  from moviepy.editor import VideoFileClip
6
  import os
7
- import torch
8
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
9
  import cv2
10
  from ultralytics import YOLO
11
  from heapq import heappush, heappushpop
12
  import numpy as np
13
  import uuid
14
- import uuid
15
  from ultralytics import YOLO
16
  import gradio as gr
17
 
18
- # # default: Load the model on the available device(s)
19
- # model = Qwen2VLForConditionalGeneration.from_pretrained(
20
- # "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
21
- # )
22
-
23
- #8-bit quantization
24
  model = Qwen2VLForConditionalGeneration.from_pretrained(
25
- "Qwen/Qwen2-VL-7B-Instruct",
26
- torch_dtype=torch.float16,
27
- load_in_8bit=True, # Use 8-bit quantization
28
- device_map="auto",
29
  )
30
 
 
 
 
 
 
 
 
 
31
 
32
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
33
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
 
4
  import uuid
5
  from moviepy.editor import VideoFileClip
6
  import os
 
7
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
  import cv2
9
  from ultralytics import YOLO
10
  from heapq import heappush, heappushpop
11
  import numpy as np
12
  import uuid
 
13
  from ultralytics import YOLO
14
  import gradio as gr
15
 
16
+ # default: Load the model on the available device(s)
 
 
 
 
 
17
  model = Qwen2VLForConditionalGeneration.from_pretrained(
18
+ "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
 
 
 
19
  )
20
 
21
+ # #8-bit quantization
22
+ # model = Qwen2VLForConditionalGeneration.from_pretrained(
23
+ # "Qwen/Qwen2-VL-7B-Instruct",
24
+ # torch_dtype=torch.float16,
25
+ # load_in_8bit=True, # Use 8-bit quantization
26
+ # device_map="auto",
27
+ # )
28
+
29
 
30
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
31
  # model = Qwen2VLForConditionalGeneration.from_pretrained(