Spaces:

axelhortua
/

Object-segmentation

Running

App Files Files Community

Alex Hortua commited on Mar 23

Commit

380570c

0 Parent(s):

Creating new implementation of this code

Browse files

Files changed (7) hide show

.gitignore +3 -0
README.MD +8 -0
requirements.txt +7 -0
src/anaglyphGenerator.py +39 -0
src/app.py +78 -0
src/testing.py +4 -0
src/utils.py +58 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.qodo
+/src/__pycache__
+/venv

README.MD ADDED Viewed

	@@ -0,0 +1,8 @@

+# 3D Person Segmentation App
+This app segments a person from an image using SegFormer and creates a 3D red-cyan anaglyph image.
+## Setup
+```bash
+pip install -r requirements.txt

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+torch
+Pillow
+datasets
+opencv-python
+gradio
+numpy

src/anaglyphGenerator.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import numpy as np
+from PIL import Image
+from utils import load_model, segment_person
+def create_anaglyph(person_img_path, background_img_path, output_path="output_anaglyph.png"):
+    image = Image.open(person_img_path).convert("RGB")
+    background = Image.open(background_img_path).convert("RGB").resize(image.size)
+    processor, model = load_model()
+    mask = segment_person(image, processor, model)
+    image_np = np.array(image)
+    background_np = np.array(background)
+    person_only = image_np * mask
+    background_only = background_np * (1 - mask)
+    # Stereoscopic shift
+    shift_pixels = 10
+    person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
+    person_right = np.roll(person_only, shift=shift_pixels, axis=1)
+    left_eye = np.clip(person_left + background_only, 0, 255).astype(np.uint8)
+    right_eye = np.clip(person_right + background_only, 0, 255).astype(np.uint8)
+    # Merge into red-cyan anaglyph
+    anaglyph = np.stack([
+        left_eye[:, :, 0],
+        right_eye[:, :, 1],
+        right_eye[:, :, 2]
+    ], axis=2)
+    anaglyph_img = Image.fromarray(anaglyph.astype(np.uint8))
+    anaglyph_img.save(output_path)
+    print(f"✅ Anaglyph image saved to: {output_path}")
+if __name__ == "__main__":
+    create_anaglyph("person.png", "bg.png")

src/app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+from utils import load_model, segment_person, resize_image
+# Load model and processor once
+processor, model = load_model()
+# Default background (solid color)
+default_bg = Image.new("RGB", (512, 512), color=(95, 147, 89))
+def generate_3d_outputs(person_img, background_img=None, shift_pixels=10,  person_size=100):
+    # Resize images to match
+    image = resize_image(person_img, person_size)
+    if background_img is None:
+        background = default_bg.resize(image.size)
+    else:
+        background = Image.fromarray(background_img).convert("RGB").resize(image.size)
+    # Step 1: Segment person
+    mask = segment_person(image, processor, model)
+    image_np = np.array(image)
+    background_np = np.array(background)
+    person_only = image_np * mask
+    background_only = background_np * (1 - mask)
+    # Step 2: Create stereo pair
+    person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
+    person_right = np.roll(person_only, shift=shift_pixels, axis=1)
+    left_eye = np.clip(person_left + background_only, 0, 255).astype(np.uint8)
+    right_eye = np.clip(person_right + background_only, 0, 255).astype(np.uint8)
+    # --- Combine left and right images side by side ---
+    stereo_pair = np.concatenate([left_eye, right_eye], axis=1)
+    stereo_image = Image.fromarray(stereo_pair)
+    # Step 3: Create anaglyph
+    anaglyph = np.stack([
+        left_eye[:, :, 0],  # Red from left
+        right_eye[:, :, 1],  # Green from right
+        right_eye[:, :, 2]   # Blue from right
+    ], axis=2)
+    anaglyph_img = Image.fromarray(anaglyph.astype(np.uint8))
+    left_img = Image.fromarray(left_eye)
+    right_img = Image.fromarray(right_eye)
+    return anaglyph_img, stereo_image
+# Gradio Interface
+demo = gr.Interface(
+    fn=generate_3d_outputs,
+    inputs=[
+        gr.Image(label="Person Image"),
+        gr.Image(label="Optional Background Image"),
+        gr.Slider(minimum=0, maximum=10, step=1, value=10, label="interaxial distance"),
+        gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Person Size %"),
+    ],
+    outputs=[
+        gr.Image(label="3D Anaglyph Image"),
+        gr.Image(label="Stereo_pair"),
+    ],
+    title="3D Person Segmentation Viewer",
+    description="Upload a person photo and optionally a background image. Outputs anaglyph and stereo views."
+)
+if __name__ == "__main__":
+    demo.launch()

src/testing.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from app import create_anaglyph
+# Provide paths to your test images
+create_anaglyph("person.png", "bg.png", "test_anaglyph.png")

src/utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+import numpy as np
+from PIL import Image
+import cv2
+from transformers import AutoImageProcessor, SegformerForSemanticSegmentation
+def load_model():
+    processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+    model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+    return processor, model
+def segment_person(image: Image.Image, processor, model):
+    inputs = processor(images=image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    upsampled_logits = torch.nn.functional.interpolate(
+        logits,
+        size=image.size[::-1],
+        mode="bilinear",
+        align_corners=False,
+    )
+    pred_classes = upsampled_logits.argmax(dim=1)[0].cpu().numpy()
+    mask = (pred_classes == 12).astype(np.uint8) * 255  # Class 12 = person
+    # Clean mask
+    kernel = np.ones((7, 7), np.uint8)
+    eroded_mask = cv2.erode(mask, kernel, iterations=1)
+    blurred_mask = cv2.GaussianBlur(eroded_mask, (3, 3), sigmaX=0, sigmaY=0)
+    final_mask = blurred_mask.astype(np.float32) / 255.0
+    final_mask_3ch = np.stack([final_mask]*3, axis=-1)
+    return final_mask_3ch
+def resize_image(image, size_percent):
+  # Convert image to RGB if it's RGBA
+  image = Image.fromarray(image).convert("RGB")
+  width, height = image.size
+  new_width = int(width * size_percent / 100)
+  new_height = int(height * size_percent / 100)
+  # Create new transparent image with original dimensions
+  resized_image = Image.new('RGB', (width, height), (0, 0, 0))
+  # Resize original image
+  scaled_content = image.resize((new_width, new_height))
+  # Calculate position to paste resized content in center
+  x = (width - new_width) // 2
+  y = (height - new_height) // 2
+  # Paste resized content onto transparent background
+  resized_image.paste(scaled_content, (x, y))
+  return resized_image