prithivMLmods commited on
Commit
750e7a3
·
verified ·
1 Parent(s): 2ab8423

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -1
app.py CHANGED
@@ -13,8 +13,12 @@ import numpy as np
13
  from PIL import Image
14
  import cv2
15
 
 
 
16
  from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
 
 
18
  AutoProcessor,
19
  TextIteratorStreamer,
20
  )
@@ -45,6 +49,16 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
45
  torch_dtype=torch.float16
46
  ).to(device).eval()
47
 
 
 
 
 
 
 
 
 
 
 
48
  def downsample_video(video_path):
49
  """
50
  Downsamples the video to evenly spaced frames.
@@ -82,6 +96,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
82
  elif model_name == "Qwen2.5-VL-3B-Instruct":
83
  processor = processor_x
84
  model = model_x
 
 
 
85
  else:
86
  yield "Invalid model selected."
87
  return
@@ -132,6 +149,9 @@ def generate_video(model_name: str, text: str, video_path: str,
132
  elif model_name == "Qwen2.5-VL-3B-Instruct":
133
  processor = processor_x
134
  model = model_x
 
 
 
135
  else:
136
  yield "Invalid model selected."
137
  return
@@ -230,7 +250,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
230
  with gr.Column():
231
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
232
  model_choice = gr.Radio(
233
- choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct"],
234
  label="Select Model",
235
  value="Qwen2.5-VL-7B-Instruct"
236
  )
 
13
  from PIL import Image
14
  import cv2
15
 
16
+ from keye_vl_utils import process_vision_info
17
+
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
20
+ AutoModel,
21
+ AutoTokenizer,
22
  AutoProcessor,
23
  TextIteratorStreamer,
24
  )
 
49
  torch_dtype=torch.float16
50
  ).to(device).eval()
51
 
52
+ # Load Keye-VL-8B-Preview
53
+ MODEL_ID_K = "Kwai-Keye/Keye-VL-8B-Preview"
54
+ processor_k = AutoModel.from_pretrained(MODEL_ID_K, trust_remote_code=True)
55
+ model_k = Qwen2_5_VLForConditionalGeneration.from_pretrained(
56
+ MODEL_ID_K,
57
+ attn_implementation="flash_attention_2",
58
+ trust_remote_code=True,
59
+ torch_dtype=torch.float16
60
+ ).to(device).eval()
61
+
62
  def downsample_video(video_path):
63
  """
64
  Downsamples the video to evenly spaced frames.
 
96
  elif model_name == "Qwen2.5-VL-3B-Instruct":
97
  processor = processor_x
98
  model = model_x
99
+ elif model_name == "Keye-VL-8B-Preview":
100
+ processor = processor_k
101
+ model = model_k
102
  else:
103
  yield "Invalid model selected."
104
  return
 
149
  elif model_name == "Qwen2.5-VL-3B-Instruct":
150
  processor = processor_x
151
  model = model_x
152
+ elif model_name == "Keye-VL-8B-Preview":
153
+ processor = processor_k
154
+ model = model_k
155
  else:
156
  yield "Invalid model selected."
157
  return
 
250
  with gr.Column():
251
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
252
  model_choice = gr.Radio(
253
+ choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "Keye-VL-8B-Preview"],
254
  label="Select Model",
255
  value="Qwen2.5-VL-7B-Instruct"
256
  )