SaiSriTejaKuppa
/

blip

@@ -1,9 +1,8 @@
-## this handler will work with image as a url.
-from typing import Dict, List, Any
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
-import requests
 import torch
 class EndpointHandler():
@@ -24,15 +23,19 @@ class EndpointHandler():
         # Extract inputs and parameters
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {"mode": "image"})
-        # Get image URL and prompt from the inputs
-        image_url = inputs.get("image_url")
         prompt = inputs.get("prompt", "")  # Optional prompt for conditional captioning
-        # Load image from URL and ensure RGB format
-        image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
         # Process inputs with or without a prompt
         if prompt:
             processed_inputs = self.processor(image, prompt, return_tensors="pt").to(self.model.device)
@@ -44,4 +47,4 @@ class EndpointHandler():
         caption = self.processor.decode(out[0], skip_special_tokens=True)
         # Return the generated caption
-        return {"caption": caption}

+import base64
+from io import BytesIO
+from typing import Dict, Any
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 import torch
 class EndpointHandler():
         # Extract inputs and parameters
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {"mode": "image"})
+        # Get base64 image data and prompt from the inputs
+        image_base64 = inputs.get("image_base64")
         prompt = inputs.get("prompt", "")  # Optional prompt for conditional captioning
+        # Ensure base64-encoded image is provided
+        if not image_base64:
+            raise ValueError("No image data provided. Please provide 'image_base64'.")
+        # Decode base64 string and convert to RGB image
+        image_data = BytesIO(base64.b64decode(image_base64))
+        image = Image.open(image_data).convert("RGB")
         # Process inputs with or without a prompt
         if prompt:
             processed_inputs = self.processor(image, prompt, return_tensors="pt").to(self.model.device)
         caption = self.processor.decode(out[0], skip_special_tokens=True)
         # Return the generated caption
+        return {"caption": caption}