init commit for dealing with images from local
Browse files- handler.py +14 -11
handler.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
from typing import Dict,
|
4 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
5 |
from PIL import Image
|
6 |
-
import requests
|
7 |
import torch
|
8 |
|
9 |
class EndpointHandler():
|
@@ -24,15 +23,19 @@ class EndpointHandler():
|
|
24 |
# Extract inputs and parameters
|
25 |
inputs = data.pop("inputs", data)
|
26 |
parameters = data.pop("parameters", {"mode": "image"})
|
27 |
-
|
28 |
|
29 |
-
# Get image
|
30 |
-
|
31 |
prompt = inputs.get("prompt", "") # Optional prompt for conditional captioning
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
# Process inputs with or without a prompt
|
37 |
if prompt:
|
38 |
processed_inputs = self.processor(image, prompt, return_tensors="pt").to(self.model.device)
|
@@ -44,4 +47,4 @@ class EndpointHandler():
|
|
44 |
caption = self.processor.decode(out[0], skip_special_tokens=True)
|
45 |
|
46 |
# Return the generated caption
|
47 |
-
return {"caption": caption}
|
|
|
1 |
+
import base64
|
2 |
+
from io import BytesIO
|
3 |
+
from typing import Dict, Any
|
4 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
5 |
from PIL import Image
|
|
|
6 |
import torch
|
7 |
|
8 |
class EndpointHandler():
|
|
|
23 |
# Extract inputs and parameters
|
24 |
inputs = data.pop("inputs", data)
|
25 |
parameters = data.pop("parameters", {"mode": "image"})
|
|
|
26 |
|
27 |
+
# Get base64 image data and prompt from the inputs
|
28 |
+
image_base64 = inputs.get("image_base64")
|
29 |
prompt = inputs.get("prompt", "") # Optional prompt for conditional captioning
|
30 |
|
31 |
+
# Ensure base64-encoded image is provided
|
32 |
+
if not image_base64:
|
33 |
+
raise ValueError("No image data provided. Please provide 'image_base64'.")
|
34 |
+
|
35 |
+
# Decode base64 string and convert to RGB image
|
36 |
+
image_data = BytesIO(base64.b64decode(image_base64))
|
37 |
+
image = Image.open(image_data).convert("RGB")
|
38 |
+
|
39 |
# Process inputs with or without a prompt
|
40 |
if prompt:
|
41 |
processed_inputs = self.processor(image, prompt, return_tensors="pt").to(self.model.device)
|
|
|
47 |
caption = self.processor.decode(out[0], skip_special_tokens=True)
|
48 |
|
49 |
# Return the generated caption
|
50 |
+
return {"caption": caption}
|