glowforge-dev
/

stable-diffusion-2-1-base-custom

StableDiffusionPipeline

Model card Files Files and versions Community

stable-diffusion-2-1-base-custom / handler.py

emeersman's picture

Don't pass image to text pipeline

c4216a4 about 2 years ago

2.99 kB

	from typing import Dict, List, Any
	import torch
	import requests
	from PIL import Image
	from io import BytesIO
	from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, DDIMScheduler

	# set device
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	if device.type != 'cuda':
	raise ValueError("need to run on GPU")

	model_id = "stabilityai/stable-diffusion-2-1-base"

	class EndpointHandler():
	def __init__(self, path=""):
	# load the optimized model
	self.textPipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
	self.textPipe.scheduler = DDIMScheduler.from_config(self.textPipe.scheduler.config)
	self.textPipe = self.textPipe.to(device)

	# create an img2img model
	self.imgPipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
	self.imgPipe.scheduler = DDIMScheduler.from_config(self.imgPipe.scheduler.config)
	self.imgPipe = self.imgPipe.to(device)

	def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	A :obj:`dict`:. base64 encoded image
	"""
	prompt = data.pop("inputs", data)
	url = data.pop("url", data)
	response = requests.get(url)
	init_image = Image.open(BytesIO(response.content)).convert("RGB")
	init_image.thumbnail((512, 512))

	params = data.pop("parameters", data)

	# hyperparamters
	num_inference_steps = params.pop("num_inference_steps", 25)
	guidance_scale = params.pop("guidance_scale", 7.5)
	negative_prompt = params.pop("negative_prompt", None)
	height = params.pop("height", None)
	width = params.pop("width", None)
	manual_seed = params.pop("manual_seed", -1)

	out = None

	if data.get("url"):
	generator = torch.Generator(device='cuda')
	generator.manual_seed(manual_seed)
	# run img2img pipeline
	out = self.imgPipe(prompt,
	image=init_image,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	negative_prompt=negative_prompt,
	height=height,
	width=width
	)
	else:
	# run text pipeline
	out = self.textPipe(prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	negative_prompt=negative_prompt,
	height=height,
	width=width
	)


	# return first generated PIL image
	return out.images[0]