prithivMLmods commited on
Commit
b73b9a6
·
verified ·
1 Parent(s): 940e0b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -74,7 +74,7 @@ model_y = Qwen2_5_VLForConditionalGeneration.from_pretrained(
74
  torch_dtype=torch.float16
75
  ).to(device).eval()
76
 
77
- #video sampling
78
  def downsample_video(video_path):
79
  """
80
  Downsamples the video to evenly spaced frames.
@@ -129,13 +129,23 @@ def generate_image(model_name: str, text: str, image: Image.Image,
129
  yield "Please upload an image."
130
  return
131
 
132
- messages = [{
133
- "role": "user",
134
- "content": [
135
- {"type": "image", "image": image},
136
- {"type": "text", "text": text},
 
 
137
  ]
138
- }]
 
 
 
 
 
 
 
 
139
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
140
  inputs = processor(
141
  text=[prompt_full],
@@ -190,14 +200,21 @@ def generate_video(model_name: str, text: str, video_path: str,
190
  return
191
 
192
  frames = downsample_video(video_path)
 
 
 
 
 
 
193
  messages = [
194
- {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
195
  {"role": "user", "content": [{"type": "text", "text": text}]}
196
  ]
197
  for frame in frames:
198
  image, timestamp = frame
199
  messages[1]["content"].append({"type": "text", "text": f"Frame {timestamp}:"})
200
  messages[1]["content"].append({"type": "image", "image": image})
 
201
  inputs = processor.apply_chat_template(
202
  messages,
203
  tokenize=True,
 
74
  torch_dtype=torch.float16
75
  ).to(device).eval()
76
 
77
+ # Video sampling
78
  def downsample_video(video_path):
79
  """
80
  Downsamples the video to evenly spaced frames.
 
129
  yield "Please upload an image."
130
  return
131
 
132
+ if model_name == "Behemoth-3B-070225-post0.1":
133
+ messages = [
134
+ {"role": "system", "content": [{"type": "text", "text": "detailed thinking on"}]},
135
+ {"role": "user", "content": [
136
+ {"type": "image", "image": image},
137
+ {"type": "text", "text": text},
138
+ ]}
139
  ]
140
+ else:
141
+ messages = [{
142
+ "role": "user",
143
+ "content": [
144
+ {"type": "image", "image": image},
145
+ {"type": "text", "text": text},
146
+ ]
147
+ }]
148
+
149
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
150
  inputs = processor(
151
  text=[prompt_full],
 
200
  return
201
 
202
  frames = downsample_video(video_path)
203
+
204
+ if model_name == "Behemoth-3B-070225-post0.1":
205
+ system_message = "detailed thinking on"
206
+ else:
207
+ system_message = "You are a helpful assistant."
208
+
209
  messages = [
210
+ {"role": "system", "content": [{"type": "text", "text": system_message}]},
211
  {"role": "user", "content": [{"type": "text", "text": text}]}
212
  ]
213
  for frame in frames:
214
  image, timestamp = frame
215
  messages[1]["content"].append({"type": "text", "text": f"Frame {timestamp}:"})
216
  messages[1]["content"].append({"type": "image", "image": image})
217
+
218
  inputs = processor.apply_chat_template(
219
  messages,
220
  tokenize=True,