Alex Hortua commited on
Commit
380570c
·
0 Parent(s):

Creating new implementation of this code

Browse files
Files changed (7) hide show
  1. .gitignore +3 -0
  2. README.MD +8 -0
  3. requirements.txt +7 -0
  4. src/anaglyphGenerator.py +39 -0
  5. src/app.py +78 -0
  6. src/testing.py +4 -0
  7. src/utils.py +58 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .qodo
2
+ /src/__pycache__
3
+ /venv
README.MD ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # 3D Person Segmentation App
2
+
3
+ This app segments a person from an image using SegFormer and creates a 3D red-cyan anaglyph image.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ pip install -r requirements.txt
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ Pillow
4
+ datasets
5
+ opencv-python
6
+ gradio
7
+ numpy
src/anaglyphGenerator.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from PIL import Image
4
+ from utils import load_model, segment_person
5
+
6
+ def create_anaglyph(person_img_path, background_img_path, output_path="output_anaglyph.png"):
7
+ image = Image.open(person_img_path).convert("RGB")
8
+ background = Image.open(background_img_path).convert("RGB").resize(image.size)
9
+
10
+ processor, model = load_model()
11
+ mask = segment_person(image, processor, model)
12
+
13
+ image_np = np.array(image)
14
+ background_np = np.array(background)
15
+
16
+ person_only = image_np * mask
17
+ background_only = background_np * (1 - mask)
18
+
19
+ # Stereoscopic shift
20
+ shift_pixels = 10
21
+ person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
22
+ person_right = np.roll(person_only, shift=shift_pixels, axis=1)
23
+
24
+ left_eye = np.clip(person_left + background_only, 0, 255).astype(np.uint8)
25
+ right_eye = np.clip(person_right + background_only, 0, 255).astype(np.uint8)
26
+
27
+ # Merge into red-cyan anaglyph
28
+ anaglyph = np.stack([
29
+ left_eye[:, :, 0],
30
+ right_eye[:, :, 1],
31
+ right_eye[:, :, 2]
32
+ ], axis=2)
33
+
34
+ anaglyph_img = Image.fromarray(anaglyph.astype(np.uint8))
35
+ anaglyph_img.save(output_path)
36
+ print(f"✅ Anaglyph image saved to: {output_path}")
37
+
38
+ if __name__ == "__main__":
39
+ create_anaglyph("person.png", "bg.png")
src/app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ from utils import load_model, segment_person, resize_image
5
+
6
+ # Load model and processor once
7
+ processor, model = load_model()
8
+
9
+ # Default background (solid color)
10
+ default_bg = Image.new("RGB", (512, 512), color=(95, 147, 89))
11
+
12
+
13
+
14
+
15
+
16
+ def generate_3d_outputs(person_img, background_img=None, shift_pixels=10, person_size=100):
17
+ # Resize images to match
18
+ image = resize_image(person_img, person_size)
19
+
20
+ if background_img is None:
21
+ background = default_bg.resize(image.size)
22
+ else:
23
+ background = Image.fromarray(background_img).convert("RGB").resize(image.size)
24
+
25
+ # Step 1: Segment person
26
+ mask = segment_person(image, processor, model)
27
+
28
+ image_np = np.array(image)
29
+ background_np = np.array(background)
30
+
31
+ person_only = image_np * mask
32
+ background_only = background_np * (1 - mask)
33
+
34
+ # Step 2: Create stereo pair
35
+ person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
36
+ person_right = np.roll(person_only, shift=shift_pixels, axis=1)
37
+
38
+ left_eye = np.clip(person_left + background_only, 0, 255).astype(np.uint8)
39
+ right_eye = np.clip(person_right + background_only, 0, 255).astype(np.uint8)
40
+
41
+
42
+ # --- Combine left and right images side by side ---
43
+ stereo_pair = np.concatenate([left_eye, right_eye], axis=1)
44
+ stereo_image = Image.fromarray(stereo_pair)
45
+
46
+ # Step 3: Create anaglyph
47
+ anaglyph = np.stack([
48
+ left_eye[:, :, 0], # Red from left
49
+ right_eye[:, :, 1], # Green from right
50
+ right_eye[:, :, 2] # Blue from right
51
+ ], axis=2)
52
+
53
+ anaglyph_img = Image.fromarray(anaglyph.astype(np.uint8))
54
+ left_img = Image.fromarray(left_eye)
55
+ right_img = Image.fromarray(right_eye)
56
+
57
+ return anaglyph_img, stereo_image
58
+
59
+ # Gradio Interface
60
+ demo = gr.Interface(
61
+ fn=generate_3d_outputs,
62
+ inputs=[
63
+ gr.Image(label="Person Image"),
64
+ gr.Image(label="Optional Background Image"),
65
+ gr.Slider(minimum=0, maximum=10, step=1, value=10, label="interaxial distance"),
66
+ gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Person Size %"),
67
+
68
+ ],
69
+ outputs=[
70
+ gr.Image(label="3D Anaglyph Image"),
71
+ gr.Image(label="Stereo_pair"),
72
+ ],
73
+ title="3D Person Segmentation Viewer",
74
+ description="Upload a person photo and optionally a background image. Outputs anaglyph and stereo views."
75
+ )
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch()
src/testing.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from app import create_anaglyph
2
+
3
+ # Provide paths to your test images
4
+ create_anaglyph("person.png", "bg.png", "test_anaglyph.png")
src/utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from PIL import Image
4
+ import cv2
5
+ from transformers import AutoImageProcessor, SegformerForSemanticSegmentation
6
+
7
+ def load_model():
8
+ processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
9
+ model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
10
+ return processor, model
11
+
12
+ def segment_person(image: Image.Image, processor, model):
13
+ inputs = processor(images=image, return_tensors="pt")
14
+ with torch.no_grad():
15
+ outputs = model(**inputs)
16
+
17
+ logits = outputs.logits
18
+ upsampled_logits = torch.nn.functional.interpolate(
19
+ logits,
20
+ size=image.size[::-1],
21
+ mode="bilinear",
22
+ align_corners=False,
23
+ )
24
+ pred_classes = upsampled_logits.argmax(dim=1)[0].cpu().numpy()
25
+ mask = (pred_classes == 12).astype(np.uint8) * 255 # Class 12 = person
26
+
27
+ # Clean mask
28
+ kernel = np.ones((7, 7), np.uint8)
29
+ eroded_mask = cv2.erode(mask, kernel, iterations=1)
30
+ blurred_mask = cv2.GaussianBlur(eroded_mask, (3, 3), sigmaX=0, sigmaY=0)
31
+
32
+ final_mask = blurred_mask.astype(np.float32) / 255.0
33
+ final_mask_3ch = np.stack([final_mask]*3, axis=-1)
34
+
35
+ return final_mask_3ch
36
+
37
+
38
+ def resize_image(image, size_percent):
39
+ # Convert image to RGB if it's RGBA
40
+ image = Image.fromarray(image).convert("RGB")
41
+ width, height = image.size
42
+ new_width = int(width * size_percent / 100)
43
+ new_height = int(height * size_percent / 100)
44
+
45
+ # Create new transparent image with original dimensions
46
+ resized_image = Image.new('RGB', (width, height), (0, 0, 0))
47
+
48
+ # Resize original image
49
+ scaled_content = image.resize((new_width, new_height))
50
+
51
+ # Calculate position to paste resized content in center
52
+ x = (width - new_width) // 2
53
+ y = (height - new_height) // 2
54
+
55
+ # Paste resized content onto transparent background
56
+ resized_image.paste(scaled_content, (x, y))
57
+
58
+ return resized_image