from typing import Dict, List, Any | |
import torch | |
from torch import autocast | |
from diffusers import StableDiffusionPipeline | |
import base64 | |
from io import BytesIO | |
# set device | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
if device.type != 'cuda': | |
raise ValueError("need to run on GPU") | |
class EndpointHandler(): | |
def __init__(self, path=""): | |
# load the optimized model | |
self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16) | |
self.pipe = self.pipe.to(device) | |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]: | |
""" | |
Args: | |
data (:obj:): | |
includes the input data and the parameters for the inference. | |
Return: | |
A :obj:`dict`:. base64 encoded image | |
""" | |
inputs = data.pop("inputs", data) | |
# run inference pipeline | |
with autocast(device.type): | |
image = self.pipe(inputs, guidance_scale=7.5)["sample"][0] | |
# encode image as base 64 | |
buffered = BytesIO() | |
image.save(buffered, format="JPEG") | |
img_str = base64.b64encode(buffered.getvalue()) | |
# postprocess the prediction | |
return {"image": img_str.decode()} |