Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -70,13 +70,19 @@ def stream_chat(input_image: Image.Image, vlm_prompt):
|
|
70 |
image = clip_processor(images=input_image, return_tensors='pt').pixel_values
|
71 |
image = image.to('cuda')
|
72 |
|
73 |
-
|
74 |
if not vlm_prompt:
|
75 |
vlm_prompt = VLM_PROMPT
|
76 |
vlm_prompt = vlm_prompt + "\n"
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
with torch.amp.autocast_mode.autocast('cuda', enabled=True):
|
81 |
vision_outputs = clip_model(pixel_values=image, output_hidden_states=True)
|
82 |
image_features = vision_outputs.hidden_states[-2]
|
|
|
70 |
image = clip_processor(images=input_image, return_tensors='pt').pixel_values
|
71 |
image = image.to('cuda')
|
72 |
|
73 |
+
# Tokenize the prompt
|
74 |
if not vlm_prompt:
|
75 |
vlm_prompt = VLM_PROMPT
|
76 |
vlm_prompt = vlm_prompt + "\n"
|
77 |
+
prompt = tokenizer.encode(
|
78 |
+
vlm_prompt,
|
79 |
+
return_tensors='pt',
|
80 |
+
padding=False,
|
81 |
+
truncation=False,
|
82 |
+
add_special_tokens=False
|
83 |
+
)
|
84 |
+
|
85 |
+
# Embed image
|
86 |
with torch.amp.autocast_mode.autocast('cuda', enabled=True):
|
87 |
vision_outputs = clip_model(pixel_values=image, output_hidden_states=True)
|
88 |
image_features = vision_outputs.hidden_states[-2]
|