zesquirrelnator commited on
Commit
197f38e
·
verified ·
1 Parent(s): 3ae6c3a

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +46 -0
handler.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from typing import Dict, Any
3
+ from PIL import Image
4
+ import torch
5
+ import base64
6
+ from io import BytesIO
7
+ from transformers import BlipForConditionalGeneration, BlipProcessor
8
+
9
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
+
11
+ class EndpointHandler():
12
+ def __init__(self, path=""):
13
+ self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
14
+ self.model = BlipForConditionalGeneration.from_pretrained(
15
+ "Salesforce/blip-image-captioning-large"
16
+ ).to(device)
17
+ self.model.eval()
18
+
19
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
20
+ input_data = data.get("inputs", {})
21
+ encoded_images = input_data.get("images")
22
+
23
+ if not encoded_images:
24
+ return {"captions": [], "error": "No images provided"}
25
+
26
+ texts = input_data.get("texts", ["a photography of"] * len(encoded_images))
27
+
28
+ try:
29
+ raw_images = [Image.open(BytesIO(base64.b64decode(img))).convert("RGB") for img in encoded_images]
30
+ processed_inputs = [
31
+ self.processor(image, text, return_tensors="pt") for image, text in zip(raw_images, texts)
32
+ ]
33
+ processed_inputs = {
34
+ "pixel_values": torch.cat([inp["pixel_values"] for inp in processed_inputs], dim=0).to(device),
35
+ "input_ids": torch.cat([inp["input_ids"] for inp in processed_inputs], dim=0).to(device),
36
+ "attention_mask": torch.cat([inp["attention_mask"] for inp in processed_inputs], dim=0).to(device)
37
+ }
38
+
39
+ with torch.no_grad():
40
+ out = self.model.generate(**processed_inputs)
41
+
42
+ captions = self.processor.batch_decode(out, skip_special_tokens=True)
43
+ return {"captions": captions}
44
+ except Exception as e:
45
+ print(f"Error during processing: {str(e)}")
46
+ return {"captions": [], "error": str(e)}