Spaces:

yeq6x
/

Image2Body_gradio

Running on Zero

Image2Body_gradio / scripts /process_utils.py

87c0a2f 8 days ago

14.2 kB

	import io
	import os
	import base64
	from PIL import Image
	import cv2
	import numpy as np
	from scripts.generate_prompt import load_wd14_tagger_model, generate_tags, preprocess_image as wd14_preprocess_image
	from scripts.lineart_util import scribble_xdog, get_sketch, canny
	from scripts.anime import init_model
	import torch
	from diffusers import StableDiffusionPipeline, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler, AutoencoderKL
	import gc
	from peft import PeftModel
	from dotenv import load_dotenv
	from scripts.hf_utils import download_file

	import spaces

	# グローバル変数
	use_local = False
	model = None
	device = None
	torch_dtype = None # torch.float16 if device == "cuda" else torch.float32
	sotai_gen_pipe = None
	refine_gen_pipe = None

	def get_file_path(filename, subfolder):
	if use_local:
	return subfolder + "/" + filename
	else:
	return download_file(filename, subfolder)

	def ensure_rgb(image):
	if image.mode != 'RGB':
	return image.convert('RGB')
	return image

	def initialize(_use_local=False, use_gpu=False, use_dotenv=False):
	if use_dotenv:
	load_dotenv()
	global model, sotai_gen_pipe, refine_gen_pipe, use_local, device, torch_dtype
	# device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
	# torch_dtype = torch.float16 if device == "cuda" else torch.float32
	device = torch.device('cuda')
	torch_dtype = torch.float16
	use_local = _use_local

	print(f"\nDevice: {device}, Local model: {_use_local}\n")

	init_model(use_local)
	model = load_wd14_tagger_model()
	sotai_gen_pipe = initialize_sotai_model()
	refine_gen_pipe = initialize_refine_model()

	def load_lora(pipeline, lora_path, adapter_name, alpha=0.75):
	pipeline.load_lora_weights(lora_path, adapter_name)
	pipeline.fuse_lora(lora_scale=alpha, adapter_names=[adapter_name])
	pipeline.set_lora_device(adapter_names=[adapter_name], device=device)

	def initialize_sotai_model():
	global device, torch_dtype

	sotai_sd_model_path = get_file_path(os.environ["sotai_sd_model_name"], subfolder=os.environ["sd_models_dir"])
	controlnet_path1 = get_file_path(os.environ["controlnet_name1"], subfolder=os.environ["controlnet_dir2"])
	# controlnet_path1 = get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
	controlnet_path2 = get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])

	# Load the Stable Diffusion model
	sd_pipe = StableDiffusionPipeline.from_single_file(
	sotai_sd_model_path,
	torch_dtype=torch_dtype,
	use_safetensors=True
	).to(device)

	# Load the ControlNet model
	controlnet1 = ControlNetModel.from_single_file(
	controlnet_path1,
	torch_dtype=torch_dtype
	).to(device)

	# Load the ControlNet model
	controlnet2 = ControlNetModel.from_single_file(
	controlnet_path2,
	torch_dtype=torch_dtype
	).to(device)

	# Create the ControlNet pipeline
	sotai_gen_pipe = StableDiffusionControlNetPipeline(
	vae=sd_pipe.vae,
	text_encoder=sd_pipe.text_encoder,
	tokenizer=sd_pipe.tokenizer,
	unet=sd_pipe.unet,
	scheduler=sd_pipe.scheduler,
	safety_checker=sd_pipe.safety_checker,
	feature_extractor=sd_pipe.feature_extractor,
	controlnet=[controlnet1, controlnet2]
	).to(device)

	# LoRAの適用
	lora_names = [
	(os.environ["lora_name1"], 1.0),
	# (os.environ["lora_name2"], 0.3),
	]

	# for lora_name, alpha in lora_names:
	# lora_path = get_file_path(lora_name, subfolder=os.environ["lora_dir"])
	# load_lora(sotai_gen_pipe, lora_path, adapter_name=lora_name.split(".")[0], alpha=alpha)

	# スケジューラーの設定
	sotai_gen_pipe.scheduler = UniPCMultistepScheduler.from_config(sotai_gen_pipe.scheduler.config)

	return sotai_gen_pipe

	def initialize_refine_model():
	global device, torch_dtype

	refine_sd_model_path = get_file_path(os.environ["refine_sd_model_name"], subfolder=os.environ["sd_models_dir"])
	controlnet_path3 = get_file_path(os.environ["controlnet_name3"], subfolder=os.environ["controlnet_dir1"])
	controlnet_path4 = get_file_path(os.environ["controlnet_name4"], subfolder=os.environ["controlnet_dir1"])
	vae_path = get_file_path(os.environ["vae_name"], subfolder=os.environ["vae_dir"])

	# Load the Stable Diffusion model
	sd_pipe = StableDiffusionPipeline.from_single_file(
	refine_sd_model_path,
	torch_dtype=torch_dtype,
	variant="fp16",
	use_safetensors=True
	).to(device)

	# controlnet_path = "models/cn/control_v11p_sd15_canny.pth"
	controlnet1 = ControlNetModel.from_single_file(
	controlnet_path3,
	torch_dtype=torch_dtype
	).to(device)

	# Load the ControlNet model
	controlnet2 = ControlNetModel.from_single_file(
	controlnet_path4,
	torch_dtype=torch_dtype
	).to(device)

	# Create the ControlNet pipeline
	refine_gen_pipe = StableDiffusionControlNetPipeline(
	vae=AutoencoderKL.from_single_file(vae_path, torch_dtype=torch_dtype).to(device),
	text_encoder=sd_pipe.text_encoder,
	tokenizer=sd_pipe.tokenizer,
	unet=sd_pipe.unet,
	scheduler=sd_pipe.scheduler,
	safety_checker=sd_pipe.safety_checker,
	feature_extractor=sd_pipe.feature_extractor,
	controlnet=[controlnet1, controlnet2], # 複数のControlNetを指定
	).to(device)

	# スケジューラーの設定
	refine_gen_pipe.scheduler = UniPCMultistepScheduler.from_config(refine_gen_pipe.scheduler.config)

	return refine_gen_pipe

	def get_wd_tags(images: list) -> list:
	global model
	if model is None:
	raise ValueError("Model is not initialized")
	# initialize()
	preprocessed_images = [wd14_preprocess_image(img) for img in images]
	preprocessed_images = np.array(preprocessed_images)
	return generate_tags(preprocessed_images, os.environ["wd_model_name"], model)

	def preprocess_image_for_generation(image):
	if isinstance(image, str): # base64文字列の場合
	image = Image.open(io.BytesIO(base64.b64decode(image)))
	elif isinstance(image, np.ndarray): # numpy配列の場合
	image = Image.fromarray(image)
	elif not isinstance(image, Image.Image):
	raise ValueError("Unsupported image type")

	# 画像サイズの計算
	input_width, input_height = image.size
	max_size = 736
	output_width = max_size if input_height < input_width else int(input_width / input_height * max_size)
	output_height = max_size if input_height > input_width else int(input_height / input_width * max_size)

	image = image.resize((output_width, output_height))
	return image, output_width, output_height

	def binarize_image(image: Image.Image) -> np.ndarray:
	image = np.array(image.convert('L'))
	# 色反転
	image = 255 - image

	# ヒストグラム平坦化
	clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8, 8))
	image = clahe.apply(image)

	# ガウシアンブラー適用
	image = cv2.GaussianBlur(image, (5, 5), 0)

	# 適応的二値化
	binary_image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 9, -8)

	return binary_image

	def create_rgba_image(binary_image: np.ndarray, color: list) -> Image.Image:
	rgba_image = np.zeros((binary_image.shape[0], binary_image.shape[1], 4), dtype=np.uint8)
	rgba_image[:, :, 0] = color[0]
	rgba_image[:, :, 1] = color[1]
	rgba_image[:, :, 2] = color[2]
	rgba_image[:, :, 3] = binary_image
	return Image.fromarray(rgba_image, 'RGBA')

	# @spaces.GPU
	def generate_sotai_image(input_image: Image.Image, output_width: int, output_height: int) -> Image.Image:
	input_image = ensure_rgb(input_image)
	global sotai_gen_pipe
	if sotai_gen_pipe is None:
	raise ValueError("Model is not initialized")
	# initialize()

	prompt = "anime pose, girl, (white background:1.5), (monochrome:1.5), full body, sketch, eyes, breasts, (slim legs, skinny legs:1.2)"
	try:
	# 入力画像のリサイズ
	if input_image.size[0] > input_image.size[1]:
	input_image = input_image.resize((512, int(512 * input_image.size[1] / input_image.size[0])))
	else:
	input_image = input_image.resize((int(512 * input_image.size[0] / input_image.size[1]), 512))

	# EasyNegativeV2の内容
	easy_negative_v2 = "(worst quality, low quality, normal quality:1.4), lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry, artist name, (bad_prompt_version2:0.8)"

	output = sotai_gen_pipe(
	prompt,
	image=[input_image, input_image],
	negative_prompt=f"(wings:1.6), (clothes, garment, lighting, gray, missing limb, extra line, extra limb, extra arm, extra legs, hair, bangs, fringe, forelock, front hair, fill:1.4), (ink pool:1.6)",
	# negative_prompt=f"{easy_negative_v2}, (wings:1.6), (clothes, garment, lighting, gray, missing limb, extra line, extra limb, extra arm, extra legs, hair, bangs, fringe, forelock, front hair, fill:1.4), (ink pool:1.6)",
	num_inference_steps=20,
	guidance_scale=8,
	width=output_width,
	height=output_height,
	denoising_strength=0.13,
	num_images_per_prompt=1, # Equivalent to batch_size
	guess_mode=[True, True], # Equivalent to pixel_perfect
	controlnet_conditioning_scale=[1.4, 1.3], # 各ControlNetの重み
	guidance_start=[0.0, 0.0],
	guidance_end=[1.0, 1.0],
	)
	generated_image = output.images[0]

	return generated_image

	finally:
	# メモリ解放
	if device == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	# @spaces.GPU
	def generate_refined_image(prompt: str, original_image: Image.Image, output_width: int, output_height: int, weight1: float, weight2: float) -> Image.Image:
	original_image = ensure_rgb(original_image)
	global refine_gen_pipe
	if refine_gen_pipe is None:
	raise ValueError("Model is not initialized")
	# initialize()

	try:
	original_image_np = np.array(original_image)
	# scribble_xdog
	scribble_image, _ = scribble_xdog(original_image_np, 2048, 20)

	original_image = original_image.resize((output_width, output_height))
	output = refine_gen_pipe(
	prompt,
	image=[scribble_image, original_image], # 2つのControlNetに対応する入力画像
	negative_prompt="extra limb, monochrome, black and white",
	num_inference_steps=20,
	width=output_width,
	height=output_height,
	controlnet_conditioning_scale=[weight1, weight2], # 各ControlNetの重み
	control_guidance_start=[0.0, 0.0],
	control_guidance_end=[1.0, 1.0],
	guess_mode=[False, False], # pixel_perfect
	)
	generated_image = output.images[0]

	return generated_image

	finally:
	# メモリ解放
	if device == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	def process_image(input_image, mode: str, weight1: float = 0.4, weight2: float = 0.3):
	input_image = ensure_rgb(input_image)
	# サイズを取得
	input_width, input_height = input_image.size
	max_size = 736
	output_width = max_size if input_height < input_width else int(input_width / input_height * max_size)
	output_height = max_size if input_height > input_width else int(input_height / input_width * max_size)

	if mode == "refine":
	# WD-14 taggerを使用してプロンプトを生成
	image_np = np.array(ensure_rgb(input_image))
	prompt = get_wd_tags([image_np])[0]
	prompt = f"{prompt}"

	refined_image = generate_refined_image(prompt, input_image, output_width, output_height, weight1, weight2)
	refined_image = refined_image.convert('RGB')

	# スケッチ画像を生成
	refined_image_np = np.array(refined_image)
	sketch_image = get_sketch(refined_image_np, "both", 2048, 10)
	sketch_image = sketch_image.resize((output_width, output_height)) # 画像サイズを合わせる
	# スケッチ画像の二値化
	sketch_binary = binarize_image(sketch_image)
	# RGBAに変換（透明なベース画像を作成）して、青い線を設定
	sketch_image = create_rgba_image(sketch_binary, [0, 0, 255])

	# 素体画像の生成
	sotai_image = generate_sotai_image(refined_image, output_width, output_height)

	elif mode == "original":
	sotai_image = generate_sotai_image(input_image, output_width, output_height)

	# スケッチ画像の生成
	input_image_np = np.array(input_image)
	sketch_image = get_sketch(input_image_np, "both", 2048, 16)

	elif mode == "sketch":
	# スケッチ画像の生成
	input_image_np = np.array(input_image)
	sketch_image = get_sketch(input_image_np, "both", 2048, 16)

	# 素体画像の生成
	sotai_image = generate_sotai_image(sketch_image, output_width, output_height)

	else:
	raise ValueError("Invalid mode")

	# 素体画像の二値化
	sotai_binary = binarize_image(sotai_image)
	# RGBAに変換（透明なベース画像を作成）して、赤い線を設定
	sotai_image = create_rgba_image(sotai_binary, [255, 0, 0])

	return sotai_image, sketch_image

	def image_to_base64(img_array):
	buffered = io.BytesIO()
	img_array.save(buffered, format="PNG")
	return base64.b64encode(buffered.getvalue()).decode()

	def process_image_as_base64(input_image, mode: str, weight1: float = 0.4, weight2: float = 0.3):
	sotai_image, sketch_image = process_image(input_image, mode, weight1, weight2)
	return image_to_base64(sotai_image), image_to_base64(sketch_image)