Spaces:
Running
Running
Merge pull request #1 from AlexanderHMagno/main
Browse files- .gitattributes +37 -0
- README.md +259 -0
- public/Project 4 Report.pdf +0 -0
- public/images/backgrounds/a.png +3 -0
- public/images/backgrounds/b.png +3 -0
- public/images/backgrounds/c.png +3 -0
- public/images/backgrounds/d.png +3 -0
- public/images/backgrounds/e.png +3 -0
- public/images/backgrounds/f.png +3 -0
- public/images/backgrounds/g.png +3 -0
- public/images/documentation/Anaglyph.png +3 -0
- public/images/documentation/Mask.png +3 -0
- public/images/documentation/Stereo.png +3 -0
- public/images/people/a.jpg +3 -0
- public/images/people/b.jpg +3 -0
- public/images/people/c.jpg +3 -0
- public/images/people/d.png +3 -0
- public/images/people/e.jpg +3 -0
- public/images/people/f.jpg +3 -0
- public/images/people/g.jpeg +0 -0
- public/lab_report.txt +131 -0
- requirements.txt +1 -1
- src/app.py +39 -16
- src/testing.py +37 -3
- src/utils.py +58 -0
.gitattributes
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Object Segmentation
|
3 |
+
emoji: 👁
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.22.0
|
8 |
+
app_file: src/app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
# 3D Person Segmentation and Anaglyph Generation
|
13 |
+
|
14 |
+
[](https://huggingface.co/spaces/axelhortua/Object-segmentation)
|
15 |
+
|
16 |
+
## Documentation Examples
|
17 |
+
|
18 |
+
### Input and Segmentation
|
19 |
+

|
20 |
+
*Example of original input image and its segmentation mask*
|
21 |
+
|
22 |
+
### Stereo Processing
|
23 |
+

|
24 |
+
*Demonstration of stereo pair generation with different interaxial distances*
|
25 |
+
|
26 |
+
### Anaglyph Output
|
27 |
+

|
28 |
+
*Final anaglyph output with red-cyan 3D effect*
|
29 |
+
|
30 |
+
### Interface Overview
|
31 |
+

|
32 |
+
*The Gradio web interface with all adjustment controls*
|
33 |
+
|
34 |
+
## Lab Report
|
35 |
+
|
36 |
+
### Introduction
|
37 |
+
This project implements a sophisticated 3D image processing system that combines person segmentation with stereoscopic and anaglyph image generation. The main objectives were to:
|
38 |
+
1. Accurately segment people from images using advanced AI models
|
39 |
+
2. Generate stereoscopic 3D effects from 2D images
|
40 |
+
3. Create red-cyan anaglyph images for 3D viewing
|
41 |
+
4. Provide an interactive web interface for real-time processing
|
42 |
+
5. Handle varying image sizes with intelligent mask alignment
|
43 |
+
|
44 |
+
### Methodology
|
45 |
+
|
46 |
+
#### Tools and Technologies Used
|
47 |
+
- **SegFormer (nvidia/segformer-b0)**: State-of-the-art transformer-based model for semantic segmentation
|
48 |
+
- **PyTorch**: Deep learning framework for running the SegFormer model
|
49 |
+
- **OpenCV**: Image processing operations and mask refinement
|
50 |
+
- **Gradio**: Web interface development
|
51 |
+
- **NumPy**: Efficient array operations for image manipulation
|
52 |
+
- **PIL (Python Imaging Library)**: Image loading and basic transformations
|
53 |
+
|
54 |
+
#### Mask Processing Deep Dive
|
55 |
+
|
56 |
+
The mask processing is a crucial component of our system, designed to handle various challenges in creating high-quality 3D effects:
|
57 |
+
|
58 |
+
1. **Why Mask Resizing is Necessary**
|
59 |
+
- **Input Variability**: User-uploaded images come in different sizes and aspect ratios
|
60 |
+
- **Model Constraints**: SegFormer outputs masks at a fixed resolution (512x512)
|
61 |
+
- **Background Compatibility**: Backgrounds may have different dimensions than person images
|
62 |
+
- **3D Effect Quality**: Proper alignment is crucial for convincing stereoscopic effects
|
63 |
+
|
64 |
+
2. **Mask Processing Pipeline**
|
65 |
+
```
|
66 |
+
Original Image → SegFormer Segmentation → Initial Mask (512x512)
|
67 |
+
↓
|
68 |
+
Resize to Match Background
|
69 |
+
↓
|
70 |
+
Add Transparent Padding
|
71 |
+
↓
|
72 |
+
Center Alignment
|
73 |
+
↓
|
74 |
+
Final Processed Mask
|
75 |
+
```
|
76 |
+
|
77 |
+
3. **Technical Implementation**
|
78 |
+
```python
|
79 |
+
# Pseudocode for mask processing
|
80 |
+
def process_mask(mask, background_size):
|
81 |
+
# Calculate padding dimensions
|
82 |
+
pad_top = (background_height - mask_height) // 2
|
83 |
+
pad_bottom = background_height - mask_height - pad_top
|
84 |
+
pad_left = (background_width - mask_width) // 2
|
85 |
+
pad_right = background_width - mask_width - pad_left
|
86 |
+
|
87 |
+
# Add padding with transparency
|
88 |
+
padded_mask = np.pad(mask,
|
89 |
+
((pad_top, pad_bottom),
|
90 |
+
(pad_left, pad_right),
|
91 |
+
(0,0)),
|
92 |
+
mode='constant')
|
93 |
+
|
94 |
+
return padded_mask
|
95 |
+
```
|
96 |
+
|
97 |
+
#### Visual Process Explanation
|
98 |
+
|
99 |
+
```
|
100 |
+
+----------------+ +----------------+ +----------------+
|
101 |
+
| Original | | Segmented | | Padded |
|
102 |
+
| Image | --> | Mask | --> | Mask |
|
103 |
+
| (Variable) | | (512x512) | | (Background) |
|
104 |
+
+----------------+ +----------------+ +----------------+
|
105 |
+
| |
|
106 |
+
v v
|
107 |
+
+----------------+ +----------------+ +----------------+
|
108 |
+
| Left View | | Stereo Pair | | Anaglyph |
|
109 |
+
| Shifted | --> | Combined | --> | Output |
|
110 |
+
| | | | | |
|
111 |
+
+----------------+ +----------------+ +----------------+
|
112 |
+
```
|
113 |
+
|
114 |
+
**Key Processing Steps Visualization:**
|
115 |
+
|
116 |
+
1. **Mask Generation and Sizing:**
|
117 |
+
```
|
118 |
+
+------------+ +-----------+ +-------------+
|
119 |
+
| Raw Image | | Raw Mask | | Sized Mask |
|
120 |
+
| ****** | -> | ######## | -> | ######## |
|
121 |
+
| *Image * | | #Mask # | | #Mask # |
|
122 |
+
| ****** | | ######## | | ######## |
|
123 |
+
+------------+ +-----------+ +-------------+
|
124 |
+
```
|
125 |
+
|
126 |
+
2. **Transparency Handling:**
|
127 |
+
```
|
128 |
+
Original Padded Final
|
129 |
+
+----+ +------+ +------+
|
130 |
+
|####| | | | ## |
|
131 |
+
|####| -> |#### | -> |######|
|
132 |
+
|####| |#### | | ## |
|
133 |
+
+----+ +------+ +------+
|
134 |
+
```
|
135 |
+
|
136 |
+
#### Implementation Steps
|
137 |
+
|
138 |
+
1. **Person Segmentation**
|
139 |
+
- Utilized SegFormer model fine-tuned on ADE20K dataset
|
140 |
+
- Applied post-processing with erosion and Gaussian blur for mask refinement
|
141 |
+
- Implemented mask scaling and centering for various input sizes
|
142 |
+
- Added transparent padding for proper background integration
|
143 |
+
|
144 |
+
2. **Mask Processing and Alignment**
|
145 |
+
- Implemented dynamic mask resizing to match background dimensions
|
146 |
+
- Added centered padding for smaller masks
|
147 |
+
- Preserved transparency in padded regions
|
148 |
+
- Ensured proper aspect ratio maintenance
|
149 |
+
|
150 |
+
3. **Stereoscopic Processing**
|
151 |
+
- Created depth simulation through horizontal pixel shifting
|
152 |
+
- Implemented parallel view stereo pair generation
|
153 |
+
- Added configurable interaxial distance for 3D effect adjustment
|
154 |
+
- Enhanced alignment between stereo pairs with mask centering
|
155 |
+
|
156 |
+
4. **Anaglyph Generation**
|
157 |
+
- Combined left and right eye views into red-cyan anaglyph
|
158 |
+
- Implemented color channel separation and recombination
|
159 |
+
- Added background image support with proper masking
|
160 |
+
- Improved blending between foreground and background
|
161 |
+
|
162 |
+
5. **User Interface**
|
163 |
+
- Developed interactive web interface using Gradio
|
164 |
+
- Added real-time parameter adjustment capabilities
|
165 |
+
- Implemented support for custom background images
|
166 |
+
- Added size adjustment controls
|
167 |
+
|
168 |
+
### Results
|
169 |
+
|
170 |
+
The system produces three main outputs:
|
171 |
+
1. Segmentation mask showing the isolated person with proper transparency
|
172 |
+
2. Side-by-side stereo pair for parallel viewing with centered alignment
|
173 |
+
3. Red-cyan anaglyph image for 3D glasses viewing
|
174 |
+
|
175 |
+
Key Features:
|
176 |
+
- Adjustable person size (10-200%)
|
177 |
+
- Configurable interaxial distance (0-10 pixels)
|
178 |
+
- Optional custom background support
|
179 |
+
- Real-time processing and preview
|
180 |
+
- Intelligent mask alignment and padding
|
181 |
+
- Transparent background handling
|
182 |
+
|
183 |
+
### Discussion
|
184 |
+
|
185 |
+
#### Technical Challenges
|
186 |
+
1. **Mask Alignment**: Ensuring proper alignment between segmentation masks and background images required careful consideration of image dimensions and aspect ratios.
|
187 |
+
2. **Stereo Effect Quality**: Balancing the interaxial distance for comfortable viewing while maintaining the 3D effect.
|
188 |
+
3. **Performance Optimization**: Efficient processing of large images while maintaining real-time interaction.
|
189 |
+
4. **Transparency Handling**: Implementing proper transparency in padded regions while maintaining mask quality.
|
190 |
+
5. **Size Adaptation**: Managing different input image sizes while preserving aspect ratios and alignment.
|
191 |
+
|
192 |
+
#### Learning Outcomes
|
193 |
+
- Deep understanding of stereoscopic image generation
|
194 |
+
- Experience with state-of-the-art segmentation models
|
195 |
+
- Practical knowledge of image processing techniques
|
196 |
+
- Web interface development for ML applications
|
197 |
+
- Advanced mask manipulation and alignment strategies
|
198 |
+
|
199 |
+
### Conclusion
|
200 |
+
|
201 |
+
This project successfully demonstrates the integration of modern AI-powered segmentation with classical stereoscopic image processing techniques. The system provides an accessible way to create 3D effects from regular 2D images, with robust handling of different image sizes and proper transparency management.
|
202 |
+
|
203 |
+
#### Future Work
|
204 |
+
- Implementation of depth-aware 3D effect generation
|
205 |
+
- Support for video processing
|
206 |
+
- Additional 3D viewing formats (side-by-side, over-under)
|
207 |
+
- Enhanced background replacement options
|
208 |
+
- Mobile device optimization
|
209 |
+
- Advanced depth map generation
|
210 |
+
- Multi-person segmentation support
|
211 |
+
|
212 |
+
## Setup
|
213 |
+
|
214 |
+
```bash
|
215 |
+
pip install -r requirements.txt
|
216 |
+
```
|
217 |
+
|
218 |
+
## Usage
|
219 |
+
|
220 |
+
```bash
|
221 |
+
cd src
|
222 |
+
python app.py
|
223 |
+
```
|
224 |
+
|
225 |
+
## Parameters
|
226 |
+
|
227 |
+
- **Person Image**: Upload an image containing a person
|
228 |
+
- **Background Image**: (Optional) Custom background image
|
229 |
+
- **Interaxial Distance**: Adjust the 3D effect strength (0-10)
|
230 |
+
- **Person Size**: Adjust the size of the person in the output (10-200%)
|
231 |
+
|
232 |
+
## Output Types
|
233 |
+
|
234 |
+
1. **Segmentation Mask**: Shows the isolated person with proper transparency
|
235 |
+
2. **Stereo Pair**: Side-by-side stereo image for parallel viewing
|
236 |
+
3. **Anaglyph**: Red-cyan 3D image viewable with anaglyph glasses
|
237 |
+
|
238 |
+
## Technical Notes
|
239 |
+
|
240 |
+
- **Mask Processing Details**:
|
241 |
+
- Initial mask is generated at 512x512 resolution
|
242 |
+
- Dynamic padding calculation: `pad = (background_size - mask_size) // 2`
|
243 |
+
- Transparency preservation using NumPy's constant padding mode
|
244 |
+
- Aspect ratio maintained through centered scaling
|
245 |
+
- Real-time size adjustments (10-200%) applied before padding
|
246 |
+
|
247 |
+
- **Size Handling Algorithm**:
|
248 |
+
1. Calculate target dimensions based on background
|
249 |
+
2. Resize mask while maintaining aspect ratio
|
250 |
+
3. Add transparent padding to match background
|
251 |
+
4. Center the mask content
|
252 |
+
5. Apply any user-specified size adjustments
|
253 |
+
|
254 |
+
- The system automatically handles different input image sizes
|
255 |
+
- Masks are dynamically padded and centered for optimal alignment
|
256 |
+
- Transparent regions are properly preserved in the final output
|
257 |
+
- Background images are automatically scaled to match the person image
|
258 |
+
- Real-time preview updates as parameters are adjusted
|
259 |
+
|
public/Project 4 Report.pdf
ADDED
Binary file (91.5 kB). View file
|
|
public/images/backgrounds/a.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/b.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/c.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/d.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/e.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/f.png
ADDED
![]() |
Git LFS Details
|
public/images/backgrounds/g.png
ADDED
![]() |
Git LFS Details
|
public/images/documentation/Anaglyph.png
ADDED
![]() |
Git LFS Details
|
public/images/documentation/Mask.png
ADDED
![]() |
Git LFS Details
|
public/images/documentation/Stereo.png
ADDED
![]() |
Git LFS Details
|
public/images/people/a.jpg
ADDED
![]() |
Git LFS Details
|
public/images/people/b.jpg
ADDED
![]() |
Git LFS Details
|
public/images/people/c.jpg
ADDED
![]() |
Git LFS Details
|
public/images/people/d.png
ADDED
![]() |
Git LFS Details
|
public/images/people/e.jpg
ADDED
![]() |
Git LFS Details
|
public/images/people/f.jpg
ADDED
![]() |
Git LFS Details
|
public/images/people/g.jpeg
ADDED
![]() |
public/lab_report.txt
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
3D Person Segmentation and Anaglyph Generation - Lab Report
|
2 |
+
=================================================
|
3 |
+
|
4 |
+
Introduction
|
5 |
+
------------
|
6 |
+
In this project, I developed a sophisticated 3D image processing system that combines modern AI-powered person segmentation with classical stereoscopic image processing. The main objectives were successfully accomplished:
|
7 |
+
|
8 |
+
1. Implementation of accurate person segmentation using SegFormer AI model
|
9 |
+
2. Creation of stereoscopic 3D effects from 2D images
|
10 |
+
3. Generation of red-cyan anaglyph images for 3D viewing
|
11 |
+
4. Development of an interactive web interface
|
12 |
+
5. Implementation of intelligent mask alignment for varying image sizes
|
13 |
+
|
14 |
+
The project is accessible at: https://huggingface.co/spaces/axelhortua/Object-segmentation
|
15 |
+
|
16 |
+
Methodology
|
17 |
+
-----------
|
18 |
+
The implementation followed a systematic approach using various tools and technologies:
|
19 |
+
|
20 |
+
1. Tools Selection:
|
21 |
+
- SegFormer (nvidia/segformer-b0) for semantic segmentation
|
22 |
+
- PyTorch for deep learning implementation
|
23 |
+
- OpenCV for image processing
|
24 |
+
- Gradio for web interface
|
25 |
+
- NumPy for array operations
|
26 |
+
- PIL for image handling
|
27 |
+
|
28 |
+
2. Implementation Process:
|
29 |
+
|
30 |
+
a) Person Segmentation:
|
31 |
+
- Used SegFormer model fine-tuned on ADE20K dataset
|
32 |
+
- Applied post-processing with erosion and Gaussian blur
|
33 |
+
- Implemented dynamic mask scaling and centering
|
34 |
+
|
35 |
+
b) Mask Processing:
|
36 |
+
- Developed dynamic mask resizing system
|
37 |
+
- Implemented transparent padding
|
38 |
+
- Ensured proper aspect ratio maintenance
|
39 |
+
- Created centered alignment algorithm
|
40 |
+
|
41 |
+
c) Stereoscopic Processing:
|
42 |
+
- Implemented horizontal pixel shifting for depth simulation
|
43 |
+
- Created parallel view stereo pair generation
|
44 |
+
- Added configurable interaxial distance
|
45 |
+
- Enhanced stereo pair alignment
|
46 |
+
|
47 |
+
d) Anaglyph Generation:
|
48 |
+
- Implemented color channel separation
|
49 |
+
- Created background integration system
|
50 |
+
- Developed foreground-background blending
|
51 |
+
- Optimized 3D effect quality
|
52 |
+
|
53 |
+
Results
|
54 |
+
-------
|
55 |
+
The system successfully produces three main outputs:
|
56 |
+
|
57 |
+
1. Segmentation Mask:
|
58 |
+
- Clean person isolation
|
59 |
+
- Proper transparency handling
|
60 |
+
- Accurate edge detection
|
61 |
+
- Smooth mask transitions
|
62 |
+
|
63 |
+
2. Stereo Pair:
|
64 |
+
- Side-by-side stereo image
|
65 |
+
- Configurable depth effect
|
66 |
+
- Proper alignment between pairs
|
67 |
+
- Maintained image quality
|
68 |
+
|
69 |
+
3. Anaglyph Output:
|
70 |
+
- Red-cyan 3D image
|
71 |
+
- Adjustable 3D effect strength
|
72 |
+
- Clean color separation
|
73 |
+
- Minimal ghosting artifacts
|
74 |
+
|
75 |
+
Key Features Achieved:
|
76 |
+
- Person size adjustment (10-200%)
|
77 |
+
- Interaxial distance control (0-10 pixels)
|
78 |
+
- Custom background support
|
79 |
+
- Real-time processing and preview
|
80 |
+
- Intelligent mask alignment
|
81 |
+
- Transparent background handling
|
82 |
+
|
83 |
+
Discussion
|
84 |
+
----------
|
85 |
+
Technical Challenges Faced:
|
86 |
+
|
87 |
+
1. Mask Alignment:
|
88 |
+
- Complex handling of different image dimensions
|
89 |
+
- Maintaining proper aspect ratios
|
90 |
+
- Ensuring consistent centering
|
91 |
+
- Handling edge cases
|
92 |
+
|
93 |
+
2. Stereo Effect Quality:
|
94 |
+
- Balancing interaxial distance
|
95 |
+
- Minimizing visual artifacts
|
96 |
+
- Maintaining comfortable viewing experience
|
97 |
+
- Preserving image details
|
98 |
+
|
99 |
+
3. Performance Optimization:
|
100 |
+
- Efficient large image processing
|
101 |
+
- Real-time interface responsiveness
|
102 |
+
- Memory management
|
103 |
+
- Processing speed optimization
|
104 |
+
|
105 |
+
4. Transparency Handling:
|
106 |
+
- Proper alpha channel management
|
107 |
+
- Clean edge preservation
|
108 |
+
- Consistent transparency across operations
|
109 |
+
- Background integration
|
110 |
+
|
111 |
+
Learning Outcomes:
|
112 |
+
- Deep understanding of stereoscopic image generation
|
113 |
+
- Practical experience with AI models
|
114 |
+
- Advanced image processing techniques
|
115 |
+
- Web interface development skills
|
116 |
+
- Complex system integration experience
|
117 |
+
|
118 |
+
Conclusion
|
119 |
+
----------
|
120 |
+
The project successfully demonstrates the integration of AI-powered segmentation with classical stereoscopic techniques. The system provides an accessible way to create 3D effects from regular 2D images, with robust handling of different image sizes and proper transparency management.
|
121 |
+
|
122 |
+
Future Work:
|
123 |
+
1. Implementation of depth-aware 3D effect generation
|
124 |
+
2. Addition of video processing capabilities
|
125 |
+
3. Support for additional 3D viewing formats
|
126 |
+
4. Enhanced background replacement options
|
127 |
+
5. Mobile device optimization
|
128 |
+
6. Advanced depth map generation
|
129 |
+
7. Multi-person segmentation support
|
130 |
+
|
131 |
+
The project has laid a strong foundation for future developments in 3D image processing and demonstrates the potential of combining AI with traditional image processing techniques.
|
requirements.txt
CHANGED
@@ -5,4 +5,4 @@ datasets
|
|
5 |
opencv-python
|
6 |
gradio
|
7 |
numpy
|
8 |
-
|
|
|
5 |
opencv-python
|
6 |
gradio
|
7 |
numpy
|
8 |
+
imagehash
|
src/app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
from PIL import Image
|
4 |
-
from utils import load_model, segment_person, resize_image, split_stereo_image
|
5 |
-
|
6 |
# Load model and processor once
|
7 |
processor, model = load_model()
|
8 |
|
@@ -10,32 +10,55 @@ processor, model = load_model()
|
|
10 |
default_bg = Image.new("RGB", (512, 512), color=(95, 147, 89))
|
11 |
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
def generate_3d_outputs(person_img, background_img=None, shift_pixels=10, person_size=100):
|
17 |
# Resize images to match
|
18 |
-
image = resize_image(person_img, person_size)
|
19 |
-
background_img = background_img if background_img is not None else default_bg
|
20 |
|
|
|
21 |
|
22 |
# Split background image into left and right halves
|
23 |
leftBackground, rightBackground = split_stereo_image(Image.fromarray(background_img))
|
24 |
|
25 |
-
|
|
|
26 |
|
27 |
-
|
28 |
-
image = Image.fromarray(np.array(image)).resize((leftBackground.shape[1], leftBackground.shape[0]))
|
29 |
# Step 1: Segment person
|
30 |
mask = segment_person(image, processor, model)
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
leftBackground_np = np.array(leftBackground)
|
35 |
rightBackground_np = np.array(rightBackground)
|
36 |
|
37 |
-
|
38 |
-
person_only = image_np * mask
|
39 |
leftBackground_only = leftBackground_np * (1 - mask)
|
40 |
rightBackground_only = rightBackground_np * (1 - mask)
|
41 |
|
@@ -43,10 +66,9 @@ def generate_3d_outputs(person_img, background_img=None, shift_pixels=10, perso
|
|
43 |
person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
|
44 |
person_right = np.roll(person_only, shift=shift_pixels, axis=1)
|
45 |
|
46 |
-
|
47 |
left_eye = np.clip(person_right + leftBackground_only, 0, 255).astype(np.uint8)
|
48 |
right_eye = np.clip(person_left + rightBackground_only, 0, 255).astype(np.uint8)
|
49 |
-
person_segmentation = np.clip(person_only, 0, 255).astype(np.uint8)
|
50 |
|
51 |
# --- Combine left and right images side by side ---
|
52 |
stereo_pair = np.concatenate([left_eye, right_eye], axis=1)
|
@@ -71,7 +93,7 @@ demo = gr.Interface(
|
|
71 |
inputs=[
|
72 |
gr.Image(label="Person Image"),
|
73 |
gr.Image(label="Optional Background Image"),
|
74 |
-
gr.Slider(minimum=0, maximum=
|
75 |
gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Person Size %"),
|
76 |
|
77 |
],
|
@@ -80,6 +102,7 @@ demo = gr.Interface(
|
|
80 |
gr.Image(label="Stereo_pair"),
|
81 |
gr.Image(label="3D Anaglyph Image")
|
82 |
],
|
|
|
83 |
title="3D Person Segmentation Viewer",
|
84 |
description="Upload a person photo and optionally a background image. Outputs anaglyph and stereo views."
|
85 |
)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
from PIL import Image
|
4 |
+
from utils import load_model, segment_person, resize_image, split_stereo_image,resize_image_to_width, resize_mask, resize_images
|
5 |
+
from testing import get_image_names
|
6 |
# Load model and processor once
|
7 |
processor, model = load_model()
|
8 |
|
|
|
10 |
default_bg = Image.new("RGB", (512, 512), color=(95, 147, 89))
|
11 |
|
12 |
|
|
|
|
|
|
|
13 |
def generate_3d_outputs(person_img, background_img=None, shift_pixels=10, person_size=100):
|
14 |
# Resize images to match
|
|
|
|
|
15 |
|
16 |
+
background_img = background_img if background_img is not None else default_bg
|
17 |
|
18 |
# Split background image into left and right halves
|
19 |
leftBackground, rightBackground = split_stereo_image(Image.fromarray(background_img))
|
20 |
|
21 |
+
## Match person image to background image width
|
22 |
+
image = resize_image_to_width(person_img, leftBackground)
|
23 |
|
|
|
|
|
24 |
# Step 1: Segment person
|
25 |
mask = segment_person(image, processor, model)
|
26 |
|
27 |
+
# Resize mask based on person_size percentage
|
28 |
+
mask = resize_mask(person_size, mask)
|
29 |
+
|
30 |
+
# Resize image based on person_size percentage
|
31 |
+
image_np = resize_images(image, person_size)
|
32 |
+
|
33 |
+
# Apply mask to image
|
34 |
+
person_only = image_np * mask
|
35 |
+
person_segmentation = np.clip(person_only, 0, 255).astype(np.uint8)
|
36 |
|
37 |
+
# Resize mask and person_only to match background dimensions while preserving content
|
38 |
+
target_height, target_width = leftBackground.shape[:2]
|
39 |
+
current_height, current_width = mask.shape[:2]
|
40 |
+
|
41 |
+
# Calculate padding
|
42 |
+
pad_top = max(0, (target_height - current_height) // 2)
|
43 |
+
pad_bottom = max(0, target_height - current_height - pad_top)
|
44 |
+
pad_left = max(0, (target_width - current_width) // 2)
|
45 |
+
pad_right = max(0, target_width - current_width - pad_left)
|
46 |
+
|
47 |
+
# Pad mask and person_only arrays
|
48 |
+
mask = np.pad(mask, ((pad_top, pad_bottom), (pad_left, pad_right), (0,0)), mode='constant')
|
49 |
+
person_only = np.pad(person_segmentation, ((pad_top, pad_bottom), (pad_left, pad_right), (0,0)), mode='constant')
|
50 |
+
|
51 |
+
|
52 |
+
# CROP MASK TO MATCH BACKGROUND DIMENSIONS FROM CENTER OF BACKGROUND
|
53 |
+
if(mask.shape[0] > target_height or mask.shape[1] > target_width):
|
54 |
+
mask = mask[mask.shape[0]//2-target_height//2:mask.shape[0]//2+target_height//2, mask.shape[1]//2-target_width//2:mask.shape[1]//2+target_width//2, :]
|
55 |
+
person_only = person_only[person_only.shape[0]//2-target_height//2:person_only.shape[0]//2+target_height//2, person_only.shape[1]//2-target_width//2:person_only.shape[1]//2+target_width//2, :]
|
56 |
+
|
57 |
+
# Convert background images to numpy arrays
|
58 |
leftBackground_np = np.array(leftBackground)
|
59 |
rightBackground_np = np.array(rightBackground)
|
60 |
|
61 |
+
# Apply mask to background images
|
|
|
62 |
leftBackground_only = leftBackground_np * (1 - mask)
|
63 |
rightBackground_only = rightBackground_np * (1 - mask)
|
64 |
|
|
|
66 |
person_left = np.roll(person_only, shift=-shift_pixels, axis=1)
|
67 |
person_right = np.roll(person_only, shift=shift_pixels, axis=1)
|
68 |
|
69 |
+
|
70 |
left_eye = np.clip(person_right + leftBackground_only, 0, 255).astype(np.uint8)
|
71 |
right_eye = np.clip(person_left + rightBackground_only, 0, 255).astype(np.uint8)
|
|
|
72 |
|
73 |
# --- Combine left and right images side by side ---
|
74 |
stereo_pair = np.concatenate([left_eye, right_eye], axis=1)
|
|
|
93 |
inputs=[
|
94 |
gr.Image(label="Person Image"),
|
95 |
gr.Image(label="Optional Background Image"),
|
96 |
+
gr.Slider(minimum=0, maximum=20, step=1, value=10, label="interaxial distance"),
|
97 |
gr.Slider(minimum=10, maximum=200, step=10, value=100, label="Person Size %"),
|
98 |
|
99 |
],
|
|
|
102 |
gr.Image(label="Stereo_pair"),
|
103 |
gr.Image(label="3D Anaglyph Image")
|
104 |
],
|
105 |
+
examples= get_image_names(),
|
106 |
title="3D Person Segmentation Viewer",
|
107 |
description="Upload a person photo and optionally a background image. Outputs anaglyph and stereo views."
|
108 |
)
|
src/testing.py
CHANGED
@@ -1,4 +1,38 @@
|
|
1 |
-
from app import create_anaglyph
|
2 |
-
|
3 |
# Provide paths to your test images
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Provide paths to your test images
|
2 |
+
import os
|
3 |
+
import random
|
4 |
+
|
5 |
+
def get_image_names():
|
6 |
+
# Get people images
|
7 |
+
people_dir = "public/images/people"
|
8 |
+
# Get background images
|
9 |
+
background_dir = "public/images/backgrounds"
|
10 |
+
|
11 |
+
|
12 |
+
data = [
|
13 |
+
['e.jpg', 'e.png', 10, 60],
|
14 |
+
['f.jpg', 'f.png', 10, 60],
|
15 |
+
['g.jpeg', 'g.png', 10, 40],
|
16 |
+
['a.jpg', 'a.png', 10, 130],
|
17 |
+
['b.jpg', 'b.png', 10, 40],
|
18 |
+
['c.jpg', 'c.png', 10, 60],
|
19 |
+
['d.png', 'd.png', 10, 50]
|
20 |
+
]
|
21 |
+
|
22 |
+
generate_testing_list = []
|
23 |
+
|
24 |
+
for i in range(len(data)):
|
25 |
+
generate_testing_list.append(
|
26 |
+
list((
|
27 |
+
os.path.join(people_dir, data[i][0]),
|
28 |
+
os.path.join(background_dir, data[i][1]),
|
29 |
+
data[i][2],
|
30 |
+
data[i][3]
|
31 |
+
))
|
32 |
+
)
|
33 |
+
|
34 |
+
return generate_testing_list
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
src/utils.py
CHANGED
@@ -94,3 +94,61 @@ def split_stereo_image(image):
|
|
94 |
else:
|
95 |
return image, resize_image(image, 99)
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
else:
|
95 |
return image, resize_image(image, 99)
|
96 |
|
97 |
+
def resize_image_to_width(person_img, background_img):
|
98 |
+
# Resize image to match background dimensions
|
99 |
+
if (background_img.shape[1] > background_img.shape[0]):
|
100 |
+
width = background_img.shape[1]
|
101 |
+
img_array = np.array(person_img)
|
102 |
+
height = int(width * img_array.shape[0] / img_array.shape[1])
|
103 |
+
person_img = Image.fromarray(img_array).resize((width, height))
|
104 |
+
person_img = np.array(person_img)
|
105 |
+
image = Image.fromarray(person_img)
|
106 |
+
else:
|
107 |
+
height = background_img.shape[0]
|
108 |
+
img_array = np.array(person_img)
|
109 |
+
width = int(height * img_array.shape[1] / img_array.shape[0])
|
110 |
+
person_img = Image.fromarray(img_array).resize((width, height))
|
111 |
+
person_img = np.array(person_img)
|
112 |
+
image = Image.fromarray(person_img)
|
113 |
+
|
114 |
+
|
115 |
+
return image
|
116 |
+
|
117 |
+
def resize_mask(person_size, mask):
|
118 |
+
|
119 |
+
scale_factor = person_size / 100.0
|
120 |
+
mask_height, mask_width = mask.shape[:2]
|
121 |
+
new_height = int(mask_height * scale_factor)
|
122 |
+
new_width = int(mask_width * scale_factor)
|
123 |
+
|
124 |
+
# Convert mask to PIL Image for resizing
|
125 |
+
mask_image = Image.fromarray((mask * 255).astype(np.uint8))
|
126 |
+
resized_mask = mask_image.resize((new_width, new_height))
|
127 |
+
|
128 |
+
# Convert back to numpy and normalize to 0-1
|
129 |
+
mask = np.array(resized_mask).astype(np.float32) / 255.0
|
130 |
+
|
131 |
+
# Add third channel dimension back if needed
|
132 |
+
if len(mask.shape) == 2:
|
133 |
+
mask = np.stack([mask] * 3, axis=-1)
|
134 |
+
|
135 |
+
return mask
|
136 |
+
|
137 |
+
def resize_images(image, person_size):
|
138 |
+
image_np = np.array(image)
|
139 |
+
# Resize image based on person_size percentage
|
140 |
+
|
141 |
+
scale_factor = person_size / 100.0
|
142 |
+
img_height, img_width = image_np.shape[:2]
|
143 |
+
new_height = int(img_height * scale_factor)
|
144 |
+
new_width = int(img_width * scale_factor)
|
145 |
+
|
146 |
+
# Convert image to PIL Image for resizing
|
147 |
+
image_pil = Image.fromarray(image_np)
|
148 |
+
resized_image = image_pil.resize((new_width, new_height))
|
149 |
+
|
150 |
+
# Convert back to numpy
|
151 |
+
image = resized_image
|
152 |
+
image_np = np.array(image)
|
153 |
+
|
154 |
+
return image_np
|