Virtual_Try-On

Running on Zero

App Files Files Community

NikhilJoson commited on Dec 19, 2024

Commit

6f118c9

verified ·

1 Parent(s): ba6ca56

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -61

app.py CHANGED Viewed

@@ -4,12 +4,7 @@ from PIL import Image
 from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
 from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
 from src.unet_hacked_tryon import UNet2DConditionModel
-from transformers import (
-    CLIPImageProcessor,
-    CLIPVisionModelWithProjection,
-    CLIPTextModel,
-    CLIPTextModelWithProjection,
-)
 from diffusers import DDPMScheduler,AutoencoderKL
 from typing import List
@@ -25,6 +20,9 @@ from preprocess.openpose.run_openpose import OpenPose
 from detectron2.data.detection_utils import convert_PIL_to_numpy,_apply_exif_orientation
 from torchvision.transforms.functional import to_pil_image
 def pil_to_binary_mask(pil_image, threshold=0):
     np_image = np.array(pil_image)
@@ -40,55 +38,22 @@ def pil_to_binary_mask(pil_image, threshold=0):
     return output_mask
-base_path = 'yisol/IDM-VTON'
 example_path = os.path.join(os.path.dirname(__file__), 'example')
-unet = UNet2DConditionModel.from_pretrained(
-    base_path,
-    subfolder="unet",
-    torch_dtype=torch.float16,
-)
 unet.requires_grad_(False)
-tokenizer_one = AutoTokenizer.from_pretrained(
-    base_path,
-    subfolder="tokenizer",
-    revision=None,
-    use_fast=False,
-)
-tokenizer_two = AutoTokenizer.from_pretrained(
-    base_path,
-    subfolder="tokenizer_2",
-    revision=None,
-    use_fast=False,
-)
 noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
-text_encoder_one = CLIPTextModel.from_pretrained(
-    base_path,
-    subfolder="text_encoder",
-    torch_dtype=torch.float16,
-)
-text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
-    base_path,
-    subfolder="text_encoder_2",
-    torch_dtype=torch.float16,
-)
-image_encoder = CLIPVisionModelWithProjection.from_pretrained(
-    base_path,
-    subfolder="image_encoder",
-    torch_dtype=torch.float16,
-    )
-vae = AutoencoderKL.from_pretrained(base_path,
-                                    subfolder="vae",
-                                    torch_dtype=torch.float16,
-)
 # "stabilityai/stable-diffusion-xl-base-1.0",
-UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
-    base_path,
-    subfolder="unet_encoder",
-    torch_dtype=torch.float16,
-)
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
@@ -99,12 +64,7 @@ vae.requires_grad_(False)
 unet.requires_grad_(False)
 text_encoder_one.requires_grad_(False)
 text_encoder_two.requires_grad_(False)
-tensor_transfrom = transforms.Compose(
-            [
-                transforms.ToTensor(),
-                transforms.Normalize([0.5], [0.5]),
-            ]
-    )
 pipe = TryonPipeline.from_pretrained(
         base_path,
@@ -150,7 +110,15 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
     if is_checked:
         keypoints = openpose_model(human_img.resize((384,512)))
         model_parse, _ = parsing_model(human_img.resize((384,512)))
-        mask, mask_gray = get_mask_location('hd', "upper_body", model_parse, keypoints)
         mask = mask.resize((768,1024))
     else:
         mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
@@ -277,14 +245,25 @@ with image_blocks as demo:
             )
         with gr.Column():
-            garm_img = gr.Image(label="Garment", sources='upload', type="pil")
             with gr.Row(elem_id="prompt-container"):
                 with gr.Row():
-                    prompt = gr.Textbox(placeholder="Description of garment ex) Short Sleeve Round Neck T-shirts", show_label=False, elem_id="prompt")
             example = gr.Examples(
                 inputs=garm_img,
                 examples_per_page=8,
                 examples=garm_list_path)
         with gr.Column():
             # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
             masked_img = gr.Image(label="Masked image output", elem_id="masked-img",show_share_button=False)
@@ -294,7 +273,6 @@ with image_blocks as demo:
     with gr.Column():
         try_button = gr.Button(value="Try-on")
         with gr.Accordion(label="Advanced Settings", open=False):
@@ -306,7 +284,5 @@ with image_blocks as demo:
     try_button.click(fn=start_tryon, inputs=[imgs, garm_img, prompt, is_checked,is_checked_crop, denoise_steps, seed], outputs=[image_out,masked_img], api_name='tryon')
 image_blocks.launch()

 from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
 from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
 from src.unet_hacked_tryon import UNet2DConditionModel
+from transformers import (CLIPImageProcessor, CLIPVisionModelWithProjection, CLIPTextModel, CLIPTextModelWithProjection,)
 from diffusers import DDPMScheduler,AutoencoderKL
 from typing import List
 from detectron2.data.detection_utils import convert_PIL_to_numpy,_apply_exif_orientation
 from torchvision.transforms.functional import to_pil_image
+topwears = ["shirt", "t-shirt", "top", "blouse", "sweatshirt"]
+bottomwears = ["short", "shorts", "trousers", "leggings", "sweatshirt", "jeans", "skirts", "joggers", "pants", "dhoti", "lungi", "capris", "palazzos"]
 def pil_to_binary_mask(pil_image, threshold=0):
     np_image = np.array(pil_image)
     return output_mask
+base_path = './IDM-VTON'
 example_path = os.path.join(os.path.dirname(__file__), 'example')
+unet = UNet2DConditionModel.from_pretrained(base_path, subfolder="unet", torch_dtype=torch.float16,)
 unet.requires_grad_(False)
+tokenizer_one = AutoTokenizer.from_pretrained(base_path, subfolder="tokenizer", revision=None, use_fast=False,)
+tokenizer_two = AutoTokenizer.from_pretrained(base_path, subfolder="tokenizer_2", revision=None, use_fast=False,)
 noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
+text_encoder_one = CLIPTextModel.from_pretrained(base_path, subfolder="text_encoder", torch_dtype=torch.float16,)
+text_encoder_two = CLIPTextModelWithProjection.from_pretrained(base_path, subfolder="text_encoder_2", torch_dtype=torch.float16,)
+image_encoder = CLIPVisionModelWithProjection.from_pretrained(base_path, subfolder="image_encoder", torch_dtype=torch.float16,)
+vae = AutoencoderKL.from_pretrained(base_path, subfolder="vae", torch_dtype=torch.float16,)
 # "stabilityai/stable-diffusion-xl-base-1.0",
+UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(base_path, subfolder="unet_encoder", torch_dtype=torch.float16,)
 parsing_model = Parsing(0)
 openpose_model = OpenPose(0)
 unet.requires_grad_(False)
 text_encoder_one.requires_grad_(False)
 text_encoder_two.requires_grad_(False)
+tensor_transfrom = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5]),])
 pipe = TryonPipeline.from_pretrained(
         base_path,
     if is_checked:
         keypoints = openpose_model(human_img.resize((384,512)))
         model_parse, _ = parsing_model(human_img.resize((384,512)))
+        # using lambda functions to check if the description contains any -
+        contains_word = lambda s, l: any(map(lambda x: x in s, l))
+        # topwears
+        if contains_word(desc,topwears):
+            mask, mask_gray = get_mask_location('hd', "upper_body", model_parse, keypoints)
+        # bottomwears
+        if contains_word(desc,bottomwears):
+            mask, mask_gray = get_mask_location('hd', "lower_body", model_parse, keypoints)
         mask = mask.resize((768,1024))
     else:
         mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
             )
         with gr.Column():
+            garm_img = gr.Image(label="Topwear", sources='upload', type="pil")
             with gr.Row(elem_id="prompt-container"):
                 with gr.Row():
+                    prompt = gr.Textbox(placeholder="Description of topwear ex) Short Sleeve Black Round Neck T-shirts", show_label=False, elem_id="prompt")
             example = gr.Examples(
                 inputs=garm_img,
                 examples_per_page=8,
                 examples=garm_list_path)
+        with gr.Column():
+            garm_img = gr.Image(label="Bottomwear", sources='upload', type="pil")
+            with gr.Row(elem_id="prompt-container"):
+                with gr.Row():
+                    prompt = gr.Textbox(placeholder="Description of bottomwear ex) Olive Cargo Pants", show_label=False, elem_id="prompt")
+            example = gr.Examples(
+                inputs=garm_img,
+                examples_per_page=8,
+                examples=garm_list_path)
         with gr.Column():
             # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
             masked_img = gr.Image(label="Masked image output", elem_id="masked-img",show_share_button=False)
     with gr.Column():
         try_button = gr.Button(value="Try-on")
         with gr.Accordion(label="Advanced Settings", open=False):
     try_button.click(fn=start_tryon, inputs=[imgs, garm_img, prompt, is_checked,is_checked_crop, denoise_steps, seed], outputs=[image_out,masked_img], api_name='tryon')
 image_blocks.launch()