sagar007 commited on
Commit
c51ef31
·
verified ·
1 Parent(s): d880060

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -94,19 +94,33 @@ def stream_text_chat(message, history, system_prompt, temperature=0.8, max_new_t
94
  @spaces.GPU
95
  def process_vision_query(image, text_input):
96
  prompt = f"<|user|>\n<|image_1|>\n{text_input}<|end|>\n<|assistant|>\n"
97
- image = Image.fromarray(image).convert("RGB")
98
- inputs = vision_processor(prompt, image, return_tensors="pt").to(device)
99
 
100
- with torch.no_grad():
101
- generate_ids = vision_model.generate(
102
- **inputs,
103
- max_new_tokens=1000,
104
- eos_token_id=vision_processor.tokenizer.eos_token_id
105
- )
 
 
 
106
 
107
- generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
108
- response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
109
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  @spaces.GPU
112
  def generate_speech(prompt, description):
 
94
  @spaces.GPU
95
  def process_vision_query(image, text_input):
96
  prompt = f"<|user|>\n<|image_1|>\n{text_input}<|end|>\n<|assistant|>\n"
 
 
97
 
98
+ # Check if image is already a PIL Image
99
+ if isinstance(image, Image.Image):
100
+ pil_image = image
101
+ elif isinstance(image, np.ndarray):
102
+ pil_image = Image.fromarray(image).convert("RGB")
103
+ else:
104
+ raise ValueError("Unsupported image type. Expected PIL Image or numpy array.")
105
+
106
+ inputs = vision_processor(prompt, pil_image, return_tensors="pt").to(device)
107
 
108
+ try:
109
+ with torch.no_grad():
110
+ generate_ids = vision_model.generate(
111
+ **inputs,
112
+ max_new_tokens=1000,
113
+ eos_token_id=vision_processor.tokenizer.eos_token_id
114
+ )
115
+
116
+ generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
117
+ response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
118
+ return response
119
+ except RuntimeError as e:
120
+ if "CUDA out of memory" in str(e):
121
+ return "Error: GPU out of memory. Try processing a smaller image or freeing up GPU resources."
122
+ else:
123
+ raise e
124
 
125
  @spaces.GPU
126
  def generate_speech(prompt, description):