File size: 5,056 Bytes
380570c
 
 
 
 
f73d0f9
380570c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f73d0f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4454fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import torch
import numpy as np
from PIL import Image
import cv2
from transformers import AutoImageProcessor, SegformerForSemanticSegmentation
from imagehash import average_hash

def load_model():
    processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
    model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
    return processor, model

def segment_person(image: Image.Image, processor, model):
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    upsampled_logits = torch.nn.functional.interpolate(
        logits,
        size=image.size[::-1],
        mode="bilinear",
        align_corners=False,
    )
    pred_classes = upsampled_logits.argmax(dim=1)[0].cpu().numpy()
    mask = (pred_classes == 12).astype(np.uint8) * 255  # Class 12 = person

    # Clean mask
    kernel = np.ones((7, 7), np.uint8)
    eroded_mask = cv2.erode(mask, kernel, iterations=1)
    blurred_mask = cv2.GaussianBlur(eroded_mask, (3, 3), sigmaX=0, sigmaY=0)

    final_mask = blurred_mask.astype(np.float32) / 255.0
    final_mask_3ch = np.stack([final_mask]*3, axis=-1)

    return final_mask_3ch


def resize_image(image, size_percent):
  # Convert image to RGB if it's RGBA
  image = Image.fromarray(image).convert("RGB")
  width, height = image.size
  new_width = int(width * size_percent / 100)
  new_height = int(height * size_percent / 100)
  
  # Create new transparent image with original dimensions
  resized_image = Image.new('RGB', (width, height), (0, 0, 0))
  
  # Resize original image
  scaled_content = image.resize((new_width, new_height))
  
  # Calculate position to paste resized content in center
  x = (width - new_width) // 2
  y = (height - new_height) // 2
  
  # Paste resized content onto transparent background
  resized_image.paste(scaled_content, (x, y))
  
  return resized_image

# Check if two images are similar
def check_image_similarity(image1, image2):
 
    hash1 = average_hash(Image.fromarray(image1))
    hash2 = average_hash(Image.fromarray(image2)) 
    return hash1 - hash2  < 10


def split_stereo_image(image):
    """
    Splits an image into left and right halves for stereographic viewing.
    
    Args:
        image: PIL Image or numpy array
        
    Returns:
        tuple: (left_half, right_half) as numpy arrays
    """
    # Convert to numpy array if PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image)
        
    # Get width and calculate split point
    width = image.shape[1]
    split_point = width // 2
    
    # Split into left and right halves
    left_half = image[:, :split_point]
    right_half = image[:, split_point:]

    #If stereo image is provided, return left and right halves
    if check_image_similarity(left_half, right_half):
        return left_half, right_half
    else:
        return image, resize_image(image, 99)
    
def resize_image_to_width(person_img, background_img):
       # Resize image to match background dimensions
    if (background_img.shape[1] > background_img.shape[0]):
        width = background_img.shape[1]
        img_array = np.array(person_img)
        height = int(width * img_array.shape[0] / img_array.shape[1])
        person_img = Image.fromarray(img_array).resize((width, height))
        person_img = np.array(person_img)
        image = Image.fromarray(person_img)
    else:
        height = background_img.shape[0]
        img_array = np.array(person_img)
        width = int(height * img_array.shape[1] / img_array.shape[0])
        person_img = Image.fromarray(img_array).resize((width, height))
        person_img = np.array(person_img)
        image = Image.fromarray(person_img)


    return image

def resize_mask(person_size, mask):
        
    scale_factor = person_size / 100.0
    mask_height, mask_width = mask.shape[:2]
    new_height = int(mask_height * scale_factor)
    new_width = int(mask_width * scale_factor)
    
    # Convert mask to PIL Image for resizing
    mask_image = Image.fromarray((mask * 255).astype(np.uint8))
    resized_mask = mask_image.resize((new_width, new_height))
    
    # Convert back to numpy and normalize to 0-1
    mask = np.array(resized_mask).astype(np.float32) / 255.0
    
    # Add third channel dimension back if needed
    if len(mask.shape) == 2:
        mask = np.stack([mask] * 3, axis=-1)

    return mask

def resize_images(image, person_size):
    image_np = np.array(image)
    # Resize image based on person_size percentage

    scale_factor = person_size / 100.0
    img_height, img_width = image_np.shape[:2]
    new_height = int(img_height * scale_factor)
    new_width = int(img_width * scale_factor)
    
    # Convert image to PIL Image for resizing
    image_pil = Image.fromarray(image_np)
    resized_image = image_pil.resize((new_width, new_height))
    
    # Convert back to numpy
    image = resized_image
    image_np = np.array(image)

    return image_np