prithivMLmods commited on
Commit
a6b3ea2
·
verified ·
1 Parent(s): 53f1c96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -22
app.py CHANGED
@@ -13,8 +13,6 @@ import numpy as np
13
  from PIL import Image
14
  import cv2
15
 
16
- from keye_vl_utils import process_vision_info
17
-
18
  from transformers import (
19
  Qwen2_5_VLForConditionalGeneration,
20
  AutoModel,
@@ -24,9 +22,6 @@ from transformers import (
24
  )
25
  from transformers.image_utils import load_image
26
 
27
- import subprocess
28
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
29
-
30
  # Constants for text generation
31
  MAX_MAX_NEW_TOKENS = 2048
32
  DEFAULT_MAX_NEW_TOKENS = 1024
@@ -52,16 +47,6 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
52
  torch_dtype=torch.float16
53
  ).to(device).eval()
54
 
55
- # Load Keye-VL-8B-Preview
56
- MODEL_ID_K = "Kwai-Keye/Keye-VL-8B-Preview"
57
- processor_k = AutoModel.from_pretrained(MODEL_ID_K, trust_remote_code=True)
58
- model_k = Qwen2_5_VLForConditionalGeneration.from_pretrained(
59
- MODEL_ID_K,
60
- attn_implementation="flash_attention_2",
61
- trust_remote_code=True,
62
- torch_dtype=torch.float16
63
- ).to(device).eval()
64
-
65
  def downsample_video(video_path):
66
  """
67
  Downsamples the video to evenly spaced frames.
@@ -99,9 +84,6 @@ def generate_image(model_name: str, text: str, image: Image.Image,
99
  elif model_name == "Qwen2.5-VL-3B-Instruct":
100
  processor = processor_x
101
  model = model_x
102
- elif model_name == "Keye-VL-8B-Preview":
103
- processor = processor_k
104
- model = model_k
105
  else:
106
  yield "Invalid model selected."
107
  return
@@ -152,9 +134,6 @@ def generate_video(model_name: str, text: str, video_path: str,
152
  elif model_name == "Qwen2.5-VL-3B-Instruct":
153
  processor = processor_x
154
  model = model_x
155
- elif model_name == "Keye-VL-8B-Preview":
156
- processor = processor_k
157
- model = model_k
158
  else:
159
  yield "Invalid model selected."
160
  return
@@ -253,7 +232,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
253
  with gr.Column():
254
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
255
  model_choice = gr.Radio(
256
- choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct", "Keye-VL-8B-Preview"],
257
  label="Select Model",
258
  value="Qwen2.5-VL-7B-Instruct"
259
  )
 
13
  from PIL import Image
14
  import cv2
15
 
 
 
16
  from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
18
  AutoModel,
 
22
  )
23
  from transformers.image_utils import load_image
24
 
 
 
 
25
  # Constants for text generation
26
  MAX_MAX_NEW_TOKENS = 2048
27
  DEFAULT_MAX_NEW_TOKENS = 1024
 
47
  torch_dtype=torch.float16
48
  ).to(device).eval()
49
 
 
 
 
 
 
 
 
 
 
 
50
  def downsample_video(video_path):
51
  """
52
  Downsamples the video to evenly spaced frames.
 
84
  elif model_name == "Qwen2.5-VL-3B-Instruct":
85
  processor = processor_x
86
  model = model_x
 
 
 
87
  else:
88
  yield "Invalid model selected."
89
  return
 
134
  elif model_name == "Qwen2.5-VL-3B-Instruct":
135
  processor = processor_x
136
  model = model_x
 
 
 
137
  else:
138
  yield "Invalid model selected."
139
  return
 
232
  with gr.Column():
233
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
234
  model_choice = gr.Radio(
235
+ choices=["Qwen2.5-VL-7B-Instruct", "Qwen2.5-VL-3B-Instruct"],
236
  label="Select Model",
237
  value="Qwen2.5-VL-7B-Instruct"
238
  )