Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -94,19 +94,33 @@ def stream_text_chat(message, history, system_prompt, temperature=0.8, max_new_t
|
|
| 94 |
@spaces.GPU
|
| 95 |
def process_vision_query(image, text_input):
|
| 96 |
prompt = f"<|user|>\n<|image_1|>\n{text_input}<|end|>\n<|assistant|>\n"
|
| 97 |
-
image = Image.fromarray(image).convert("RGB")
|
| 98 |
-
inputs = vision_processor(prompt, image, return_tensors="pt").to(device)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
@spaces.GPU
|
| 112 |
def generate_speech(prompt, description):
|
|
|
|
| 94 |
@spaces.GPU
|
| 95 |
def process_vision_query(image, text_input):
|
| 96 |
prompt = f"<|user|>\n<|image_1|>\n{text_input}<|end|>\n<|assistant|>\n"
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
# Check if image is already a PIL Image
|
| 99 |
+
if isinstance(image, Image.Image):
|
| 100 |
+
pil_image = image
|
| 101 |
+
elif isinstance(image, np.ndarray):
|
| 102 |
+
pil_image = Image.fromarray(image).convert("RGB")
|
| 103 |
+
else:
|
| 104 |
+
raise ValueError("Unsupported image type. Expected PIL Image or numpy array.")
|
| 105 |
+
|
| 106 |
+
inputs = vision_processor(prompt, pil_image, return_tensors="pt").to(device)
|
| 107 |
|
| 108 |
+
try:
|
| 109 |
+
with torch.no_grad():
|
| 110 |
+
generate_ids = vision_model.generate(
|
| 111 |
+
**inputs,
|
| 112 |
+
max_new_tokens=1000,
|
| 113 |
+
eos_token_id=vision_processor.tokenizer.eos_token_id
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
|
| 117 |
+
response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
| 118 |
+
return response
|
| 119 |
+
except RuntimeError as e:
|
| 120 |
+
if "CUDA out of memory" in str(e):
|
| 121 |
+
return "Error: GPU out of memory. Try processing a smaller image or freeing up GPU resources."
|
| 122 |
+
else:
|
| 123 |
+
raise e
|
| 124 |
|
| 125 |
@spaces.GPU
|
| 126 |
def generate_speech(prompt, description):
|