whyumesh commited on
Commit
c28fb09
·
verified ·
1 Parent(s): 07de076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -1,19 +1,18 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
4
  from qwen_vl_utils import process_vision_info
5
  from PIL import Image
6
  import cv2
7
  import numpy as np
 
8
  import spaces
9
 
10
  # Load the model and processor
11
- @spaces.GPU
12
  def load_model():
13
  model = Qwen2VLForConditionalGeneration.from_pretrained(
14
  "Qwen/Qwen2-VL-2B-Instruct",
15
  torch_dtype=torch.float16
16
- ).to("cuda")
17
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
18
  return model, processor
19
 
@@ -41,7 +40,8 @@ def process_image(image, prompt):
41
  padding=True,
42
  return_tensors="pt",
43
  ).to("cuda")
44
-
 
45
  with torch.no_grad():
46
  generated_ids = model.generate(**inputs, max_new_tokens=256)
47
  generated_ids_trimmed = [
@@ -54,8 +54,8 @@ def process_image(image, prompt):
54
  return output_text[0]
55
 
56
  @spaces.GPU
57
- def process_video(video_path, prompt, max_frames=16, frame_interval=30, max_resolution=224):
58
- cap = cv2.VideoCapture(video_path)
59
  frames = []
60
  frame_count = 0
61
 
@@ -100,9 +100,7 @@ def process_video(video_path, prompt, max_frames=16, frame_interval=30, max_reso
100
  return_tensors="pt",
101
  ).to("cuda")
102
 
103
- del frames, image_inputs, video_inputs
104
- torch.cuda.empty_cache()
105
-
106
  with torch.no_grad():
107
  generated_ids = model.generate(**inputs, max_new_tokens=256)
108
  generated_ids_trimmed = [
@@ -119,22 +117,24 @@ def process_content(content, prompt):
119
  if content is None:
120
  return "Please upload an image or video file."
121
 
122
- if isinstance(content, Image.Image):
123
- return process_image(content, prompt)
124
  elif content.name.lower().endswith(('.mp4', '.avi', '.mov')):
125
- return process_video(content.name, prompt)
126
  else:
127
  return "Unsupported file type. Please provide an image or video file."
128
 
 
129
  iface = gr.Interface(
130
  fn=process_content,
131
  inputs=[
132
- gr.File(label="Upload Image or Video", type="filepath"),
133
- gr.Textbox(label="Enter your prompt or task description")
134
  ],
135
  outputs="text",
136
- title="Image and Video Description with Custom Prompt",
137
- description="Upload an image or video and specify a task to get a response.",
138
  )
139
 
140
- iface.launch()
 
 
 
1
  import torch
2
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
3
  from qwen_vl_utils import process_vision_info
4
  from PIL import Image
5
  import cv2
6
  import numpy as np
7
+ import gradio as gr
8
  import spaces
9
 
10
  # Load the model and processor
 
11
  def load_model():
12
  model = Qwen2VLForConditionalGeneration.from_pretrained(
13
  "Qwen/Qwen2-VL-2B-Instruct",
14
  torch_dtype=torch.float16
15
+ )
16
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
17
  return model, processor
18
 
 
40
  padding=True,
41
  return_tensors="pt",
42
  ).to("cuda")
43
+
44
+ model.to("cuda")
45
  with torch.no_grad():
46
  generated_ids = model.generate(**inputs, max_new_tokens=256)
47
  generated_ids_trimmed = [
 
54
  return output_text[0]
55
 
56
  @spaces.GPU
57
+ def process_video(video, prompt, max_frames=16, frame_interval=30, max_resolution=224):
58
+ cap = cv2.VideoCapture(video.name)
59
  frames = []
60
  frame_count = 0
61
 
 
100
  return_tensors="pt",
101
  ).to("cuda")
102
 
103
+ model.to("cuda")
 
 
104
  with torch.no_grad():
105
  generated_ids = model.generate(**inputs, max_new_tokens=256)
106
  generated_ids_trimmed = [
 
117
  if content is None:
118
  return "Please upload an image or video file."
119
 
120
+ if content.name.lower().endswith(('.png', '.jpg', '.jpeg')):
121
+ return process_image(Image.open(content.name), prompt)
122
  elif content.name.lower().endswith(('.mp4', '.avi', '.mov')):
123
+ return process_video(content, prompt)
124
  else:
125
  return "Unsupported file type. Please provide an image or video file."
126
 
127
+ # Gradio interface
128
  iface = gr.Interface(
129
  fn=process_content,
130
  inputs=[
131
+ gr.File(label="Upload Image or Video"),
132
+ gr.Textbox(label="Enter your prompt")
133
  ],
134
  outputs="text",
135
+ title="Image and Video Description",
136
+ description="Upload an image or video and enter a prompt to get a description or analysis.",
137
  )
138
 
139
+ if __name__ == "__main__":
140
+ iface.launch()