GT_VTR3_1 / run /gradio_ootd.py
Ubuntu
improved inference time
3bc69b8
raw
history blame
14.4 kB
import gradio as gr
import os
from pathlib import Path
import sys
import torch
from PIL import Image, ImageOps
import numpy as np
from utils_ootd import get_mask_location
from cloths_db import cloths_map, modeL_db
PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
sys.path.insert(0, str(PROJECT_ROOT))
from preprocess.openpose.run_openpose import OpenPose
from preprocess.humanparsing.run_parsing import Parsing
from ootd.inference_ootd_hd import OOTDiffusionHD
from ootd.inference_ootd_dc import OOTDiffusionDC
from preprocess.openpose.annotator.openpose.util import draw_bodypose
# Set default dtype to float64
# torch.set_default_dtype(torch.float16)
openpose_model_hd = OpenPose(0)
parsing_model_hd = Parsing(0)
ootd_model_hd = OOTDiffusionHD(0)
openpose_model_dc = OpenPose(0)
parsing_model_dc = Parsing(0)
ootd_model_dc = OOTDiffusionDC(0)
category_dict = ['upperbody', 'lowerbody', 'dress']
category_dict_utils = ['upper_body', 'lower_body', 'dresses']
example_path = os.path.join(os.path.dirname(__file__), 'examples')
garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
model_hd = os.path.join(example_path, 'model/model_1.png')
garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
model_dc = os.path.join(example_path, 'model/model_8.png')
garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
ootd_model_dc.pipe.to('cuda')
ootd_model_dc.image_encoder.to('cuda')
ootd_model_dc.text_encoder.to('cuda')
def convert_to_image(image_array):
if isinstance(image_array, np.ndarray):
# Normalize the data to the range [0, 255]
image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
# Convert to uint8
image_array = image_array.astype(np.uint8)
return Image.fromarray(image_array)
else:
# Convert to NumPy array first if necessary
image_array = np.array(image_array)
# Normalize and convert to uint8
image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
image_array = image_array.astype(np.uint8)
return Image.fromarray(image_array)
# import spaces
# @spaces.GPU
def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
model_type = 'hd'
category = 0 # 0:upperbody; 1:lowerbody; 2:dress
with torch.no_grad():
openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
ootd_model_hd.pipe.to('cuda')
ootd_model_hd.image_encoder.to('cuda')
ootd_model_hd.text_encoder.to('cuda')
garm_img = Image.open(garm_img).resize((768, 1024))
vton_img = Image.open(vton_img).resize((768, 1024))
keypoints = openpose_model_hd(vton_img.resize((384, 512)))
model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
mask = mask.resize((768, 1024), Image.NEAREST)
mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
masked_vton_img = Image.composite(mask_gray, vton_img, mask)
images = ootd_model_hd(
model_type=model_type,
category=category_dict[category],
image_garm=garm_img,
image_vton=masked_vton_img,
mask=mask,
image_ori=vton_img,
num_samples=n_samples,
num_steps=n_steps,
image_scale=image_scale,
seed=seed,
)
return images
# @spaces.GPU
def process_dc(vton_img, garm_img, category):
model_type = 'dc'
if category == 'Upper-body':
category = 0
elif category == 'Lower-body':
category = 1
else:
category =2
with torch.no_grad():
# openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
# ootd_model_dc.pipe.to('cuda')
# ootd_model_dc.image_encoder.to('cuda')
# ootd_model_dc.text_encoder.to('cuda')
garm_img = Image.open(garm_img).resize((768, 1024))
vton_img = Image.open(vton_img).resize((768, 1024))
keypoints ,candidate , subset = openpose_model_dc(vton_img.resize((384, 512)))
# print(len(keypoints["pose_keypoints_2d"]))
# print(keypoints["pose_keypoints_2d"])
# person_image = np.asarray(vton_img)
# print(len(person_image))
# person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
# output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
# output_image = Image.fromarray(output)
# output_image.save('keypose.png')
left_point = keypoints["pose_keypoints_2d"][2]
right_point = keypoints["pose_keypoints_2d"][5]
neck_point = keypoints["pose_keypoints_2d"][1]
hip_point = keypoints["pose_keypoints_2d"][8]
print(f'left shoulder - {left_point}')
print(f'right shoulder - {right_point}')
# #find disctance using Euclidian distance
shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
height_pixels = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
# # Assuming an average human height
average_height_cm = 172.72 *1.5
# Conversion factor from pixels to cm
conversion_factor = average_height_cm / height_pixels
# Convert shoulder width to real-world units
shoulder_width_cm = shoulder_width_pixels * conversion_factor
print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
print(f'Estimated height (in pixels): {height_pixels}')
print(f'Conversion factor (pixels to cm): {conversion_factor}')
print(f'Shoulder width (in cm): {shoulder_width_cm}')
print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
model_parse_image = convert_to_image(model_parse)
face_mask_image = convert_to_image(face_mask)
# Save the images
model_parse_image.save('model_parse_image.png')
face_mask_image.save('face_mask_image.png')
mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
# final_mask = convert_to_image(mask)
# final_mask.save("final_mask.png")
# final_mask_grat = convert_to_image(mask_gray)
# final_mask_grat.save("final_mask_grat.png")
mask = mask.resize((768, 1024), Image.NEAREST)
mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
# Save the resized masks
mask.save("mask_resized.png")
mask_gray.save("mask_gray_resized.png")
masked_vton_img = Image.composite(mask_gray, vton_img, mask)
masked_vton_img.save("masked_vton_img.png")
images = ootd_model_dc(
model_type=model_type,
category=category_dict[category],
image_garm=garm_img,
image_vton=masked_vton_img,
mask=mask,
image_ori=vton_img,
num_samples=1,
num_steps=10,
image_scale= 2.0,
seed=-1,
)
return images
# is_upper = False
block = gr.Blocks().queue()
with block:
with gr.Row():
gr.Markdown("# ")
with gr.Row():
gr.Markdown("## Virtual Trial Room")
# with gr.Row():
# gr.Markdown("")
with gr.Row():
with gr.Column():
vton_img_dc = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_dc)
# Hidden component to store is_upper value
# is_upper = gr.State(value=True)
# #set is_upper variable to True when user selects examples from gr.examples upper/lower body
# def check_image_type(image_path):
# if image_path:
# filename = os.path.basename(image_path)
# image_type = modeL_db.get(filename, "no-dress") # Default to "no-dress" if not found
# return image_type == "no-dress"
# return False # Default to True if no image
# vton_img_dc.change(fn=check_image_type, inputs=vton_img_dc)
example = gr.Examples(
label="Select for Upper/Lower Body",
inputs=vton_img_dc,
examples_per_page=7,
examples=[
os.path.join(example_path, 'model/model_8.png'),
os.path.join(example_path, 'model/049447_0.jpg'),
os.path.join(example_path, 'model/049713_0.jpg'),
os.path.join(example_path, 'model/051482_0.jpg'),
os.path.join(example_path, 'model/051918_0.jpg'),
os.path.join(example_path, 'model/051962_0.jpg'),
os.path.join(example_path, 'model/049205_0.jpg'),
],
)
example = gr.Examples(
label="Select for Full Body Dress",
inputs=vton_img_dc,
examples_per_page=7,
examples=[
os.path.join(example_path, 'model/model_9.png'),
os.path.join(example_path, 'model/052767_0.jpg'),
os.path.join(example_path, 'model/052472_0.jpg'),
os.path.join(example_path, 'model/053514_0.jpg'),
os.path.join(example_path, 'model/053228_0.jpg'),
os.path.join(example_path, 'model/052964_0.jpg'),
os.path.join(example_path, 'model/053700_0.jpg'),
],
)
with gr.Column():
garm_img_dc = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_dc)
category_dc = gr.Dropdown(label="Garment category (important option!!!)", choices=["Upper-body", "Lower-body", "Dress"], value="Upper-body")
def update_category(image_path):
if image_path:
filename = os.path.basename(image_path)
return cloths_map.get(filename, None) # Return None if not found
return None # Return None if no image
garm_img_dc.change(fn=update_category, inputs=garm_img_dc, outputs=category_dc)
example = gr.Examples(
label="Examples (upper-body)",
inputs=garm_img_dc,
examples_per_page=7,
examples=[
os.path.join(garment_path,'01260_00.jpg'),
os.path.join(garment_path,'01430_00.jpg'),
os.path.join(garment_path,'02783_00.jpg'),
os.path.join(garment_path,'03751_00.jpg'),
os.path.join(garment_path,'06429_00.jpg'),
os.path.join(garment_path,'06802_00.jpg'),
os.path.join(garment_path,'07429_00.jpg'),
os.path.join(garment_path,'08348_00.jpg'),
os.path.join(garment_path,'09933_00.jpg'),
os.path.join(garment_path,'11028_00.jpg'),
os.path.join(garment_path,'11351_00.jpg'),
os.path.join(garment_path,'11791_00.jpg'),
os.path.join(garment_path, '048554_1.jpg'),
os.path.join(garment_path, '049920_1.jpg'),
os.path.join(garment_path, '049965_1.jpg'),
os.path.join(garment_path, '049949_1.jpg'),
os.path.join(garment_path, '050181_1.jpg'),
os.path.join(garment_path, '049805_1.jpg'),
os.path.join(garment_path, '050105_1.jpg'),
os.path.join(garment_path, 'male_tshirt1.png'),
])
example = gr.Examples(
label="Examples (lower-body)",
inputs=garm_img_dc,
examples_per_page=7,
examples=[
os.path.join(garment_path, '051827_1.jpg'),
os.path.join(garment_path, '051946_1.jpg'),
os.path.join(garment_path, '051473_1.jpg'),
os.path.join(garment_path, '051515_1.jpg'),
os.path.join(garment_path, '051517_1.jpg'),
os.path.join(garment_path, '051988_1.jpg'),
os.path.join(garment_path, '051412_1.jpg'),
])
example = gr.Examples(
label="Examples (dress)",
inputs=garm_img_dc,
examples_per_page=7,
examples=[
os.path.join(garment_path, '053290_1.jpg'),
os.path.join(garment_path, '053744_1.jpg'),
os.path.join(garment_path, '053742_1.jpg'),
os.path.join(garment_path, '053786_1.jpg'),
os.path.join(garment_path, '053790_1.jpg'),
os.path.join(garment_path, '053319_1.jpg'),
os.path.join(garment_path, '052234_1.jpg'),
])
with gr.Column():
result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
with gr.Column():
run_button_dc = gr.Button(value="Run")
# n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
# n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
# scale_dc = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
# image_scale_dc = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
# seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
# ips_dc = [vton_img_dc, garm_img_dc, category_dc]
ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
block.launch(server_name="0.0.0.0", server_port=7860 )