Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -101,7 +101,10 @@ def process_image_input(image, vision_model, vision_processor):
|
|
101 |
return "Error: Vision model is not available."
|
102 |
|
103 |
try:
|
104 |
-
|
|
|
|
|
|
|
105 |
inputs = {k: v.to(vision_model.device) for k, v in inputs.items()}
|
106 |
|
107 |
with torch.no_grad():
|
@@ -111,7 +114,6 @@ def process_image_input(image, vision_model, vision_processor):
|
|
111 |
return generated_text
|
112 |
except Exception as e:
|
113 |
return f"Error processing image: {str(e)}"
|
114 |
-
|
115 |
# Generate response within a GPU-decorated function
|
116 |
@spaces.GPU
|
117 |
def generate_response(transcription, sarvam_pipe):
|
|
|
101 |
return "Error: Vision model is not available."
|
102 |
|
103 |
try:
|
104 |
+
# Add a generic prompt for image description
|
105 |
+
prompt = "Describe this image in detail."
|
106 |
+
|
107 |
+
inputs = vision_processor(images=image, text=prompt, return_tensors="pt")
|
108 |
inputs = {k: v.to(vision_model.device) for k, v in inputs.items()}
|
109 |
|
110 |
with torch.no_grad():
|
|
|
114 |
return generated_text
|
115 |
except Exception as e:
|
116 |
return f"Error processing image: {str(e)}"
|
|
|
117 |
# Generate response within a GPU-decorated function
|
118 |
@spaces.GPU
|
119 |
def generate_response(transcription, sarvam_pipe):
|