Spaces:

sidhtang
/

_implementation_

Runtime error

App Files Files Community

_implementation_ / app.py

sidhtang

Update app.py

bd5362c verified 8 months ago

raw

history blame contribute delete

9.91 kB


	import cv2
	import numpy as np
	import mediapipe as mp
	import torch
	import torch.nn as nn
	import torchvision.transforms as transforms
	from PIL import Image
	import gradio as gr
	from enum import Enum
	import colorsys
	from typing import Tuple, Dict
	import torch.nn.functional as F

	class ClothingType(Enum):
	SHIRT = "shirt"
	PANTS = "pants"
	DRESS = "dress"
	JACKET = "jacket"

	class BodySegmentation(nn.Module):
	def __init__(self):
	super().__init__()
	# Load DeepLab v3+ for semantic segmentation
	self.model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
	self.model.eval()

	def forward(self, x):
	return self.model(x)['out']

	class VirtualTryOn:
	def __init__(self):
	# Initialize MediaPipe
	self.mp_pose = mp.solutions.pose
	self.mp_holistic = mp.solutions.holistic
	self.pose = self.mp_pose.Pose(
	static_image_mode=True,
	model_complexity=2,
	min_detection_confidence=0.5
	)
	self.holistic = self.mp_holistic.Holistic(
	static_image_mode=True,
	model_complexity=2,
	min_detection_confidence=0.5
	)

	# Initialize body segmentation
	self.segmentation = BodySegmentation()
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	self.segmentation.to(self.device)

	# Image transforms
	self.transforms = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225])
	])

	def get_body_segmentation(self, image: np.ndarray) -> np.ndarray:
	"""
	Get precise body segmentation mask
	"""
	# Prepare image for model
	pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	input_tensor = self.transforms(pil_image).unsqueeze(0).to(self.device)

	# Get segmentation mask
	with torch.no_grad():
	output = self.segmentation(input_tensor)
	mask = torch.argmax(output, dim=1).squeeze().cpu().numpy()

	# Person class is typically index 15 in COCO dataset
	return (mask == 15).astype(np.uint8)

	def estimate_lighting(self, image: np.ndarray) -> Dict[str, float]:
	"""
	Estimate lighting conditions from the image
	"""
	# Convert to HSV
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

	# Get average brightness and saturation
	brightness = np.mean(hsv[:, :, 2])
	saturation = np.mean(hsv[:, :, 1])

	return {
	'brightness': brightness / 255.0,
	'saturation': saturation / 255.0
	}

	def adjust_clothing_color(self, clothing: np.ndarray,
	lighting_params: Dict[str, float]) -> np.ndarray:
	"""
	Adjust clothing colors to match lighting conditions
	"""
	# Convert to HSV for easier adjustment
	hsv = cv2.cvtColor(clothing, cv2.COLOR_BGR2HSV).astype(np.float32)

	# Adjust brightness and saturation
	hsv[:, :, 2] *= lighting_params['brightness']
	hsv[:, :, 1] *= lighting_params['saturation']

	# Ensure values are within valid range
	hsv = np.clip(hsv, 0, 255).astype(np.uint8)

	# Convert back to BGR
	return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

	def get_clothing_dimensions(self, landmarks, image_shape: Tuple[int, int],
	clothing_type: ClothingType) -> Dict:
	"""
	Get clothing dimensions based on body landmarks and clothing type
	"""
	height, width = image_shape[:2]

	if clothing_type in [ClothingType.SHIRT, ClothingType.JACKET]:
	# For upper body clothing
	left_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_SHOULDER]
	right_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_SHOULDER]
	left_hip = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_HIP]

	shoulder_width = abs(right_shoulder.x - left_shoulder.x) * width
	torso_height = abs(left_shoulder.y - left_hip.y) * height

	return {
	'top_left': (
	int(min(left_shoulder.x, right_shoulder.x) * width),
	int(left_shoulder.y * height)
	),
	'width': int(shoulder_width * 1.3),
	'height': int(torso_height * 1.1)
	}

	elif clothing_type == ClothingType.PANTS:
	# For pants
	left_hip = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_HIP]
	right_hip = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_HIP]
	left_ankle = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_ANKLE]

	hip_width = abs(right_hip.x - left_hip.x) * width
	leg_height = abs(left_hip.y - left_ankle.y) * height

	return {
	'top_left': (
	int(min(left_hip.x, right_hip.x) * width),
	int(left_hip.y * height)
	),
	'width': int(hip_width * 1.5),
	'height': int(leg_height * 1.05)
	}

	elif clothing_type == ClothingType.DRESS:
	# For dresses
	left_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_SHOULDER]
	right_shoulder = landmarks.landmark[self.mp_pose.PoseLandmark.RIGHT_SHOULDER]
	left_knee = landmarks.landmark[self.mp_pose.PoseLandmark.LEFT_KNEE]

	shoulder_width = abs(right_shoulder.x - left_shoulder.x) * width
	dress_height = abs(left_shoulder.y - left_knee.y) * height

	return {
	'top_left': (
	int(min(left_shoulder.x, right_shoulder.x) * width),
	int(left_shoulder.y * height)
	),
	'width': int(shoulder_width * 1.4),
	'height': int(dress_height * 1.1)
	}

	def try_on(self, person_image: np.ndarray, clothing_image: np.ndarray,
	clothing_type: ClothingType) -> np.ndarray:
	"""
	Enhanced try-on method with support for different clothing types
	"""
	# Get body segmentation
	body_mask = self.get_body_segmentation(person_image)

	# Get pose landmarks
	results = self.pose.process(cv2.cvtColor(person_image, cv2.COLOR_BGR2RGB))
	if not results.pose_landmarks:
	raise ValueError("No person detected in the image")

	# Estimate lighting conditions
	lighting_params = self.estimate_lighting(person_image)

	# Adjust clothing colors
	adjusted_clothing = self.adjust_clothing_color(clothing_image, lighting_params)

	# Get clothing dimensions
	dimensions = self.get_clothing_dimensions(
	results.pose_landmarks,
	person_image.shape,
	clothing_type
	)

	# Resize clothing
	clothing_resized = cv2.resize(
	adjusted_clothing,
	(dimensions['width'], dimensions['height']),
	interpolation=cv2.INTER_AREA
	)

	# Create alpha mask for smooth blending
	if clothing_resized.shape[2] == 4:
	alpha_channel = clothing_resized[:, :, 3] / 255.0
	else:
	alpha_channel = np.ones(clothing_resized.shape[:2])

	alpha_3channel = np.stack([alpha_channel] * 3, axis=2)

	# Calculate placement coordinates
	y1 = dimensions['top_left'][1]
	y2 = y1 + dimensions['height']
	x1 = dimensions['top_left'][0]
	x2 = x1 + dimensions['width']

	# Ensure coordinates are within image boundaries
	y1 = max(0, y1)
	y2 = min(person_image.shape[0], y2)
	x1 = max(0, x1)
	x2 = min(person_image.shape[1], x2)

	# Apply body mask to improve blending
	body_mask_roi = body_mask[y1:y2, x1:x2]
	alpha_3channel = alpha_3channel * np.expand_dims(body_mask_roi, axis=2)

	# Blend images
	roi = person_image[y1:y2, x1:x2]
	clothing_rgb = clothing_resized[:, :, :3]
	blended = (1 - alpha_3channel) * roi + alpha_3channel * clothing_rgb[:roi.shape[0], :roi.shape[1]]

	result = person_image.copy()
	result[y1:y2, x1:x2] = blended

	return result

	def create_gradio_interface():
	def process_images(person_img, clothing_img, clothing_type):
	try_on = VirtualTryOn()

	# Convert clothing type string to enum
	clothing_type_enum = ClothingType(clothing_type.lower())

	# Process the images
	result = try_on.try_on(person_img, clothing_img, clothing_type_enum)

	return result

	# Create the interface
	iface = gr.Interface(
	fn=process_images,
	inputs=[
	gr.Image(label="Upload Person Image"),
	gr.Image(label="Upload Clothing Image"),
	gr.Dropdown(
	choices=["Shirt", "Pants", "Dress", "Jacket"],
	label="Select Clothing Type"
	)
	],
	outputs=gr.Image(label="Result"),
	title="Virtual Try-On System",
	description="Upload a person's image and a clothing item to see how it looks!",
	examples=[
	["person.jpg", "shirt.png", "Shirt"],
	["person.jpg", "pants.png", "Pants"]
	]
	)

	return iface

	if __name__ == "__main__":
	iface = create_gradio_interface()
	iface.launch()