IDM-VTON

Runtime error

App Files Files Community

IDM-VTON / app.py

Saad0KH

Update app.py

b56c80f verified 10 months ago

raw

history blame

9.83 kB

	import os
	import base64
	import logging
	import uuid
	import requests
	import torch
	from flask import Flask, request, jsonify, send_file
	from PIL import Image
	from io import BytesIO
	from torchvision import transforms
	from torchvision.transforms.functional import to_pil_image
	from transformers import (
	CLIPImageProcessor,
	CLIPVisionModelWithProjection,
	CLIPTextModel,
	CLIPTextModelWithProjection,
	AutoTokenizer
	)
	from diffusers import DDPMScheduler, AutoencoderKL
	from preprocess.humanparsing.run_parsing import Parsing
	from preprocess.openpose.run_openpose import OpenPose
	from detectron2.data.detection_utils import convert_PIL_to_numpy, _apply_exif_orientation
	from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
	from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
	from src.unet_hacked_tryon import UNet2DConditionModel
	import apply_net

	app = Flask(__name__)

	base_path = 'yisol/IDM-VTON'
	example_path = os.path.join(os.path.dirname(__file__), 'example')

	# Load models
	def load_model(name, subfolder, dtype=torch.float16):
	return torch.load(
	os.path.join(base_path, subfolder, name),
	map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
	dtype=dtype
	)

	unet = load_model("unet.pt", "unet")
	tokenizer_one = AutoTokenizer.from_pretrained(base_path, subfolder="tokenizer", use_fast=False)
	tokenizer_two = AutoTokenizer.from_pretrained(base_path, subfolder="tokenizer_2", use_fast=False)
	noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")

	text_encoder_one = CLIPTextModel.from_pretrained(base_path, subfolder="text_encoder", torch_dtype=torch.float16)
	text_encoder_two = CLIPTextModelWithProjection.from_pretrained(base_path, subfolder="text_encoder_2", torch_dtype=torch.float16)
	image_encoder = CLIPVisionModelWithProjection.from_pretrained(base_path, subfolder="image_encoder", torch_dtype=torch.float16)
	vae = AutoencoderKL.from_pretrained(base_path, subfolder="vae", torch_dtype=torch.float16)

	UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(base_path, subfolder="unet_encoder", torch_dtype=torch.float16)

	parsing_model = Parsing(0)
	openpose_model = OpenPose(0)

	# Disable gradient computation
	for model in [unet, UNet_Encoder, image_encoder, vae, text_encoder_one, text_encoder_two]:
	model.requires_grad_(False)

	tensor_transfrom = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize([0.5], [0.5]),
	])

	pipe = TryonPipeline.from_pretrained(
	base_path,
	unet=unet,
	vae=vae,
	feature_extractor=CLIPImageProcessor(),
	text_encoder=text_encoder_one,
	text_encoder_2=text_encoder_two,
	tokenizer=tokenizer_one,
	tokenizer_2=tokenizer_two,
	scheduler=noise_scheduler,
	image_encoder=image_encoder,
	torch_dtype=torch.float16
	)
	pipe.unet_encoder = UNet_Encoder

	def pil_to_binary_mask(pil_image, threshold=0):
	np_image = np.array(pil_image.convert("L"))
	binary_mask = np_image > threshold
	mask = (binary_mask * 255).astype(np.uint8)
	return Image.fromarray(mask)

	def get_image_from_url(url):
	try:
	response = requests.get(url)
	response.raise_for_status()
	return Image.open(BytesIO(response.content))
	except Exception as e:
	logging.error(f"Error fetching image from URL: {e}")
	raise

	def decode_image_from_base64(base64_str):
	try:
	img_data = base64.b64decode(base64_str)
	return Image.open(BytesIO(img_data))
	except Exception as e:
	logging.error(f"Error decoding image: {e}")
	raise

	def encode_image_to_base64(img):
	try:
	buffered = BytesIO()
	img.save(buffered, format="PNG")
	return base64.b64encode(buffered.getvalue()).decode("utf-8")
	except Exception as e:
	logging.error(f"Error encoding image: {e}")
	raise

	def save_image(img):
	unique_name = f"{uuid.uuid4()}.webp"
	img.save(unique_name, format="WEBP", lossless=True)
	return unique_name

	@spaces.GPU
	def start_tryon(human_dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed, categorie='upper_body'):
	device = "cuda"
	openpose_model.preprocessor.body_estimation.model.to(device)
	pipe.to(device)
	pipe.unet_encoder.to(device)

	garm_img = garm_img.convert("RGB").resize((768, 1024))
	human_img_orig = human_dict["background"].convert("RGB")

	if is_checked_crop:
	width, height = human_img_orig.size
	target_width = min(width, height * (3 / 4))
	target_height = min(height, width * (4 / 3))
	left = (width - target_width) / 2
	top = (height - target_height) / 2
	cropped_img = human_img_orig.crop((left, top, width - left, height - top))
	crop_size = cropped_img.size
	human_img = cropped_img.resize((768, 1024))
	else:
	human_img = human_img_orig.resize((768, 1024))

	if is_checked:
	keypoints = openpose_model(human_img.resize((384, 512)))
	model_parse, _ = parsing_model(human_img.resize((384, 512)))
	mask, mask_gray = get_mask_location('hd', categorie, model_parse, keypoints)
	mask = mask.resize((768, 1024))
	else:
	mask = pil_to_binary_mask(human_dict['layers'][0].convert("RGB").resize((768, 1024)))

	mask_gray = (1 - transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
	mask_gray = to_pil_image((mask_gray + 1.0) / 2.0)

	human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
	human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")

	args = apply_net.create_argument_parser().parse_args(
	('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda')
	)
	pose_img = args.func(args, human_img_arg)[:, :, ::-1]
	pose_img = Image.fromarray(pose_img).resize((768, 1024))

	with torch.no_grad(), torch.cuda.amp.autocast():
	prompt = f"model is wearing {garment_des}"
	negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
	prompt_embeds = pipe.encode_prompt(prompt, num_images_per_prompt=1, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
	prompt = f"a photo of {garment_des}"
	prompt_embeds_c = pipe.encode_prompt(prompt, num_images_per_prompt=1, do_classifier_free_guidance=False, negative_prompt=negative_prompt)

	pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device, torch.float16)
	garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device, torch.float16)
	generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
	images = pipe(
	prompt_embeds=prompt_embeds.to(device, torch.float16),
	negative_prompt_embeds=prompt_embeds[1].to(device, torch.float16),
	pooled_prompt_embeds=prompt_embeds[2].to(device, torch.float16),
	negative_pooled_prompt_embeds=prompt_embeds[3].to(device, torch.float16),
	num_inference_steps=denoise_steps,
	generator=generator,
	strength=1.0,
	pose_img=pose_img,
	text_embeds_cloth=prompt_embeds_c.to(device, torch.float16),
	cloth=garm_tensor,
	mask_image=mask,
	image=human_img,
	height=1024,
	width=768,
	ip_adapter_image=garm_img.resize((768, 1024)),
	guidance_scale=2.0
	)[0]

	if is_checked_crop:
	out_img = images[0].resize(crop_size)
	human_img_orig.paste(out_img, (int(left), int(top)))
	return human_img_orig, mask_gray
	else:
	return images[0], mask_gray

	def clear_gpu_memory():
	torch.cuda.empty_cache()
	torch.cuda.synchronize()

	def process_image(image_data):
	if image_data.startswith(('http://', 'https://')):
	return get_image_from_url(image_data)
	return decode_image_from_base64(image_data)

	@app.route('/tryon', methods=['POST'])
	def tryon():
	data = request.json
	try:
	human_image_data = process_image(data['human_image'])
	garment_image_data = process_image(data['garment_image'])
	category = data.get('category', 'upper_body')
	description = data.get('description', '')
	checked = data.get('checked', False)
	checked_crop = data.get('checked_crop', False)
	denoise_steps = data.get('denoise_steps', 50)
	seed = data.get('seed', None)

	human_dict = {
	"background": human_image_data,
	"layers": [human_image_data],
	}

	result_img, mask_img = start_tryon(
	human_dict,
	garment_image_data,
	description,
	checked,
	checked_crop,
	denoise_steps,
	seed,
	category
	)

	encoded_image = encode_image_to_base64(result_img)
	encoded_mask = encode_image_to_base64(mask_img)

	#clear_gpu_memory()

	return jsonify({
	'result_image': encoded_image,
	'mask_image': encoded_mask,
	})

	except Exception as e:
	logging.error(f"Error in /tryon endpoint: {e}")
	return jsonify({'error': str(e)}), 500

	# Route pour récupérer l'image générée
	@app.route('/api/get_image/<image_id>', methods=['GET'])
	def get_image(image_id):
	# Construire le chemin complet de l'image
	image_path = image_id # Assurez-vous que le nom de fichier correspond à celui que vous avez utilisé lors de la sauvegarde

	# Renvoyer l'image
	try:
	return send_file(image_path, mimetype='image/webp')
	except FileNotFoundError:
	return jsonify({'error': 'Image not found'}), 404

	if __name__ == "__main__":
	app.run(debug=False, host="0.0.0.0", port=7860)