import cv2 import numpy as np import mediapipe as mp import torch import torch.nn as nn import torchvision.transforms as transforms from PIL import Image import gradio as gr from enum import Enum import colorsys from typing import Tuple, Dict import torch.nn.functional as F class ClothingType(Enum): SHIRT = "shirt" PANTS = "pants" DRESS = "dress" JACKET = "jacket" class BodySegmentation(nn.Module): def __init__(self): super().__init__() # Load DeepLab v3+ for semantic segmentation self.model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True) self.model.eval() def forward(self, x): return self.model(x)['out'] class VirtualTryOn: def __init__(self): # Initialize MediaPipe self.mp_pose = mp.solutions.pose self.mp_holistic = mp.solutions.holistic self.pose = self.mp_pose.Pose( static_image_mode=True, model_complexity=2, min_detection_confidence=0.5 ) self.holistic = self.mp_holistic.Holistic( static_image_mode=True, model_complexity=2, min_detection_confidence=0.5 ) # Initialize body segmentation self.segmentation = BodySegmentation() self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.segmentation.to(self.device) # Image transforms self.transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def get_body_segmentation(self, image: np.ndarray) -> np.ndarray: """ Get precise body segmentation mask """ # Prepare image for model pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) input_tensor = self.transforms(pil_image).unsqueeze(0).to(self.device) # Get segmentation mask with torch.no_grad(): output = self.segmentation(input_tensor) mask = torch.argmax(output, dim=1).squeeze().cpu().numpy() # Person class is typically index 15 in COCO dataset return (mask == 15).astype(np.uint8) def estimate_lighting(self, image: np.ndarray) -> Dict[str, float]: """ Estimate lighting conditions from the image """ # Convert to HSV hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Get average brightness and saturation brightness = np.mean(hsv[:, :, 2]) saturation = np.mean(hsv[:, :, 1]) return { 'brightness': brightness / 255.0, 'saturation': saturation / 255.0 } def adjust_clothing_color(self, clothing: np.ndarray, lighting_params: Dict[str, float]) -> np.ndarray: """ Adjust clothing colors to match lighting conditions """ # Convert to HSV for easier adjustment hsv = cv2.cvtColor(clothing, cv2.COLOR_BGR2HSV).astype(np.float32) # Adjust brightness and saturation hsv[:, :, 2] *= lighting_params['brightness'] hsv[:, :, 1] *= lighting_params['saturation'] # Ensure values are within valid range hsv = np.clip(hsv, 0, 255).astype(np.uint8) # Convert back to BGR return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) def get_clothing_dimensions(self, landmarks, image_shape: Tuple[int, int], clothing_type: ClothingType) -> Dict: """ Get clothing dimensions based on body landmarks and clothing type """ height, width = image_shape[:2] if clothing_type in [ClothingType.SHIRT, ClothingType.JACKET]: # For upper body clothing left_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_SHOULDER] right_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_SHOULDER] left_hip = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_HIP] shoulder_width = abs(right_shoulder.x - left_shoulder.x) * width torso_height = abs(left_shoulder.y - left_hip.y) * height return { 'top_left': ( int(min(left_shoulder.x, right_shoulder.x) * width), int(left_shoulder.y * height) ), 'width': int(shoulder_width * 1.3), 'height': int(torso_height * 1.1) } elif clothing_type == ClothingType.PANTS: # For pants left_hip = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_HIP] right_hip = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_HIP] left_ankle = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_ANKLE] hip_width = abs(right_hip.x - left_hip.x) * width leg_height = abs(left_hip.y - left_ankle.y) * height return { 'top_left': ( int(min(left_hip.x, right_hip.x) * width), int(left_hip.y * height) ), 'width': int(hip_width * 1.5), 'height': int(leg_height * 1.05) } elif clothing_type == ClothingType.DRESS: # For dresses left_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_SHOULDER] right_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_SHOULDER] left_knee = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_KNEE] shoulder_width = abs(right_shoulder.x - left_shoulder.x) * width dress_height = abs(left_shoulder.y - left_knee.y) * height return { 'top_left': ( int(min(left_shoulder.x, right_shoulder.x) * width), int(left_shoulder.y * height) ), 'width': int(shoulder_width * 1.4), 'height': int(dress_height * 1.1) } def try_on(self, person_image: np.ndarray, clothing_image: np.ndarray, clothing_type: ClothingType) -> np.ndarray: """ Enhanced try-on method with support for different clothing types """ # Get body segmentation body_mask = self.get_body_segmentation(person_image) # Get pose landmarks results = self.pose.process(cv2.cvtColor(person_image, cv2.COLOR_BGR2RGB)) if not results.pose_landmarks: raise ValueError("No person detected in the image") # Estimate lighting conditions lighting_params = self.estimate_lighting(person_image) # Adjust clothing colors adjusted_clothing = self.adjust_clothing_color(clothing_image, lighting_params) # Get clothing dimensions dimensions = self.get_clothing_dimensions( results.pose_landmarks, person_image.shape, clothing_type ) # Resize clothing clothing_resized = cv2.resize( adjusted_clothing, (dimensions['width'], dimensions['height']), interpolation=cv2.INTER_AREA ) # Create alpha mask for smooth blending if clothing_resized.shape[2] == 4: alpha_channel = clothing_resized[:, :, 3] / 255.0 else: alpha_channel = np.ones(clothing_resized.shape[:2]) alpha_3channel = np.stack([alpha_channel] * 3, axis=2) # Calculate placement coordinates y1 = dimensions['top_left'][1] y2 = y1 + dimensions['height'] x1 = dimensions['top_left'][0] x2 = x1 + dimensions['width'] # Ensure coordinates are within image boundaries y1 = max(0, y1) y2 = min(person_image.shape[0], y2) x1 = max(0, x1) x2 = min(person_image.shape[1], x2) # Apply body mask to improve blending body_mask_roi = body_mask[y1:y2, x1:x2] alpha_3channel = alpha_3channel * np.expand_dims(body_mask_roi, axis=2) # Blend images roi = person_image[y1:y2, x1:x2] clothing_rgb = clothing_resized[:, :, :3] blended = (1 - alpha_3channel) * roi + alpha_3channel * clothing_rgb[:roi.shape[0], :roi.shape[1]] result = person_image.copy() result[y1:y2, x1:x2] = blended return result def create_gradio_interface(): def process_images(person_img, clothing_img, clothing_type): try_on = VirtualTryOn() # Convert clothing type string to enum clothing_type_enum = ClothingType(clothing_type.lower()) # Process the images result = try_on.try_on(person_img, clothing_img, clothing_type_enum) return result # Create the interface iface = gr.Interface( fn=process_images, inputs=[ gr.Image(label="Upload Person Image"), gr.Image(label="Upload Clothing Image"), gr.Dropdown( choices=["Shirt", "Pants", "Dress", "Jacket"], label="Select Clothing Type" ) ], outputs=gr.Image(label="Result"), title="Virtual Try-On System", description="Upload a person's image and a clothing item to see how it looks!", examples=[ ["person.jpg", "shirt.png", "Shirt"], ["person.jpg", "pants.png", "Pants"] ] ) return iface if __name__ == "__main__": iface = create_gradio_interface() iface.launch()