Spaces:

Jacobmadwed
/

InstantX-InstantIDv2

Runtime error

App Files Files Community

InstantX-InstantIDv2 / cog /predict.py

Jacobmadwed

Upload 38 files

4bc5f6c verified 10 months ago

raw

history blame contribute delete

6.56 kB

	# Prediction interface for Cog ⚙️
	# https://github.com/replicate/cog/blob/main/docs/python.md

	import os
	import sys

	import time
	import subprocess
	from cog import BasePredictor, Input, Path

	import cv2
	import torch
	import numpy as np
	from PIL import Image

	from diffusers.utils import load_image
	from diffusers.models import ControlNetModel

	from insightface.app import FaceAnalysis

	sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
	from pipeline_stable_diffusion_xl_instantid import (
	StableDiffusionXLInstantIDPipeline,
	draw_kps,
	)

	# for `ip-adaper`, `ControlNetModel`, and `stable-diffusion-xl-base-1.0`
	CHECKPOINTS_CACHE = "./checkpoints"
	CHECKPOINTS_URL = (
	"https://weights.replicate.delivery/default/InstantID/checkpoints.tar"
	)

	# for `models/antelopev2`
	MODELS_CACHE = "./models"
	MODELS_URL = "https://weights.replicate.delivery/default/InstantID/models.tar"


	def resize_img(
	input_image,
	max_side=1280,
	min_side=1024,
	size=None,
	pad_to_max_side=False,
	mode=Image.BILINEAR,
	base_pixel_number=64,
	):
	w, h = input_image.size
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratio * w), round(ratio * h)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
	w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
	h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[
	offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
	] = np.array(input_image)
	input_image = Image.fromarray(res)
	return input_image


	def download_weights(url, dest):
	start = time.time()
	print("downloading url: ", url)
	print("downloading to: ", dest)
	subprocess.check_call(["pget", "-x", url, dest], close_fds=False)
	print("downloading took: ", time.time() - start)


	class Predictor(BasePredictor):
	def setup(self) -> None:
	"""Load the model into memory to make running multiple predictions efficient"""
	if not os.path.exists(CHECKPOINTS_CACHE):
	download_weights(CHECKPOINTS_URL, CHECKPOINTS_CACHE)

	if not os.path.exists(MODELS_CACHE):
	download_weights(MODELS_URL, MODELS_CACHE)

	self.width, self.height = 640, 640
	self.app = FaceAnalysis(
	name="antelopev2",
	root="./",
	providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
	)
	self.app.prepare(ctx_id=0, det_size=(self.width, self.height))

	# Path to InstantID models
	face_adapter = f"./checkpoints/ip-adapter.bin"
	controlnet_path = f"./checkpoints/ControlNetModel"

	# Load pipeline
	self.controlnet = ControlNetModel.from_pretrained(
	controlnet_path,
	torch_dtype=torch.float16,
	cache_dir=CHECKPOINTS_CACHE,
	local_files_only=True,
	)

	base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
	self.pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
	base_model_path,
	controlnet=self.controlnet,
	torch_dtype=torch.float16,
	cache_dir=CHECKPOINTS_CACHE,
	local_files_only=True,
	)
	self.pipe.cuda()
	self.pipe.load_ip_adapter_instantid(face_adapter)

	def predict(
	self,
	image: Path = Input(description="Input image"),
	prompt: str = Input(
	description="Input prompt",
	default="analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage, masterpiece, best quality",
	),
	negative_prompt: str = Input(
	description="Input Negative Prompt",
	default="",
	),
	width: int = Input(
	description="Width of output image",
	default=640,
	ge=512,
	le=2048,
	),
	height: int = Input(
	description="Height of output image",
	default=640,
	ge=512,
	le=2048,
	),
	ip_adapter_scale: float = Input(
	description="Scale for IP adapter",
	default=0.8,
	ge=0,
	le=1,
	),
	controlnet_conditioning_scale: float = Input(
	description="Scale for ControlNet conditioning",
	default=0.8,
	ge=0,
	le=1,
	),
	num_inference_steps: int = Input(
	description="Number of denoising steps",
	default=30,
	ge=1,
	le=500,
	),
	guidance_scale: float = Input(
	description="Scale for classifier-free guidance",
	default=5,
	ge=1,
	le=50,
	),
	) -> Path:
	"""Run a single prediction on the model"""
	if self.width != width or self.height != height:
	print(f"[!] Resizing output to {width}x{height}")
	self.width = width
	self.height = height
	self.app.prepare(ctx_id=0, det_size=(self.width, self.height))

	face_image = load_image(str(image))
	face_image = resize_img(face_image)

	face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
	face_info = sorted(
	face_info,
	key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
	reverse=True,
	)[
	0
	] # only use the maximum face
	face_emb = face_info["embedding"]
	face_kps = draw_kps(face_image, face_info["kps"])

	self.pipe.set_ip_adapter_scale(ip_adapter_scale)
	image = self.pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image_embeds=face_emb,
	image=face_kps,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	).images[0]

	output_path = "result.jpg"
	image.save(output_path)
	return Path(output_path)