Spaces:
Running
Running
Upload 129 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +15 -0
- SlimFace/.gitignore +197 -0
- SlimFace/.python-version +1 -0
- SlimFace/LICENSE +21 -0
- SlimFace/README.md +93 -0
- SlimFace/apps/gradio_app.py +26 -0
- SlimFace/apps/gradio_app/.gitkeep +0 -0
- SlimFace/apps/gradio_app/__init__.py +0 -0
- SlimFace/apps/gradio_app/inference.py +57 -0
- SlimFace/assets/comparision.md +11 -0
- SlimFace/assets/examples/.gitkeep +0 -0
- SlimFace/ckpts/.gitignore +4 -0
- SlimFace/configs/accelerate_config.yaml +6 -0
- SlimFace/configs/image_classification_models_config.yaml +249 -0
- SlimFace/data/.gitignore +5 -0
- SlimFace/data/raw/.gitignore +4 -0
- SlimFace/docs/README.md +1 -0
- SlimFace/docs/data/data_processing.md +135 -0
- SlimFace/docs/inference/inference_doc.md +15 -0
- SlimFace/docs/test/inference_test_doc.md +96 -0
- SlimFace/docs/test/training_test_doc.md +103 -0
- SlimFace/docs/training/training_doc.md +48 -0
- SlimFace/new_contruct.md +63 -0
- SlimFace/requirements/requirements.txt +15 -0
- SlimFace/requirements/requirements_compatible.txt +15 -0
- SlimFace/requirements/requirements_inference.txt +15 -0
- SlimFace/scripts/download_ckpts.py +104 -0
- SlimFace/scripts/process_dataset.py +242 -0
- SlimFace/scripts/setup_third_party.py +61 -0
- SlimFace/src/slimface/__init__.py +0 -0
- SlimFace/src/slimface/data/data_processing.py +67 -0
- SlimFace/src/slimface/data/process_face.py +64 -0
- SlimFace/src/slimface/inference/__init__.py +0 -0
- SlimFace/src/slimface/inference/end2end_inference.py +143 -0
- SlimFace/src/slimface/inference/inference.py +126 -0
- SlimFace/src/slimface/models/__init__.py +0 -0
- SlimFace/src/slimface/models/classification_models/__init__.py +0 -0
- SlimFace/src/slimface/models/classification_models/alls.py +55 -0
- SlimFace/src/slimface/models/classification_models/efficient_v1.py +0 -0
- SlimFace/src/slimface/models/classification_models/efficient_v2.py +0 -0
- SlimFace/src/slimface/models/classification_models/regnet.py +0 -0
- SlimFace/src/slimface/models/classification_models/vit.py +0 -0
- SlimFace/src/slimface/models/detection_models/align.py +57 -0
- SlimFace/src/slimface/models/detection_models/face_yolo.py +151 -0
- SlimFace/src/slimface/models/detection_models/mtcnn.py +175 -0
- SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/.gitignore +3 -0
- SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/LICENSE +21 -0
- SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/README.md +26 -0
- SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.caffemodel +0 -0
- SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.prototxt +177 -0
.gitattributes
CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det2.caffemodel filter=lfs diff=lfs merge=lfs -text
|
37 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det3.caffemodel filter=lfs diff=lfs merge=lfs -text
|
38 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det4.caffemodel filter=lfs diff=lfs merge=lfs -text
|
39 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/example.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/office2.jpg filter=lfs diff=lfs merge=lfs -text
|
41 |
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/office4.jpg filter=lfs diff=lfs merge=lfs -text
|
42 |
+
SlimFace/tests/test_images/Cate[[:space:]]Blanchett.jpg filter=lfs diff=lfs merge=lfs -text
|
43 |
+
SlimFace/tests/test_images/Daniel[[:space:]]Day-Lewis.jpg filter=lfs diff=lfs merge=lfs -text
|
44 |
+
SlimFace/tests/test_images/dont_know.jpg filter=lfs diff=lfs merge=lfs -text
|
45 |
+
SlimFace/tests/test_images/Elon_Musk.jpg filter=lfs diff=lfs merge=lfs -text
|
46 |
+
SlimFace/tests/test_images/Gal[[:space:]]Gado.jpg filter=lfs diff=lfs merge=lfs -text
|
47 |
+
SlimFace/tests/test_images/Kate[[:space:]]Winslet.jpg filter=lfs diff=lfs merge=lfs -text
|
48 |
+
SlimFace/tests/test_images/Tom[[:space:]]Cruise.jpg filter=lfs diff=lfs merge=lfs -text
|
49 |
+
SlimFace/tests/test_images/Tom[[:space:]]Hanks.jpg filter=lfs diff=lfs merge=lfs -text
|
50 |
+
SlimFace/tests/test_images/Viola[[:space:]]Davis.jpg filter=lfs diff=lfs merge=lfs -text
|
SlimFace/.gitignore
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# Abstra
|
171 |
+
# Abstra is an AI-powered process automation framework.
|
172 |
+
# Ignore directories containing user credentials, local state, and settings.
|
173 |
+
# Learn more at https://abstra.io/docs
|
174 |
+
.abstra/
|
175 |
+
|
176 |
+
# Visual Studio Code
|
177 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
178 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
179 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
180 |
+
# you could uncomment the following to ignore the enitre vscode folder
|
181 |
+
# .vscode/
|
182 |
+
|
183 |
+
# Ruff stuff:
|
184 |
+
.ruff_cache/
|
185 |
+
|
186 |
+
# PyPI configuration file
|
187 |
+
.pypirc
|
188 |
+
|
189 |
+
# Cursor
|
190 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
191 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
192 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
193 |
+
.cursorignore
|
194 |
+
.cursorindexingignore
|
195 |
+
|
196 |
+
# lightning_logs
|
197 |
+
lightning_logs
|
SlimFace/.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.11.11
|
SlimFace/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 Danh Tran
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
SlimFace/README.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SlimFace: Slim Face Recognition
|
2 |
+
|
3 |
+
> ## Credits and Citation
|
4 |
+
>
|
5 |
+
> ℹ️ This project is based on the [](https://github.com/otroshi/edgeface) by [](https://github.com/otroshi), and includes our own bug fixes and enhancements.
|
6 |
+
>
|
7 |
+
> If this project is helpful for your research, please consider citing the original paper:
|
8 |
+
>
|
9 |
+
> **Edgeface: Efficient face recognition model for edge devices**
|
10 |
+
> *George, Anjith and Ecabert, Christophe and Shahreza, Hatef Otroshi and Kotwal, Ketan and Marcel, Sebastien*
|
11 |
+
> *IEEE Transactions on Biometrics, Behavior, and Identity Science (2024)*
|
12 |
+
>
|
13 |
+
> **If you use this work in your research, please cite the original paper:**
|
14 |
+
> ```bibtex
|
15 |
+
> @article{edgeface,
|
16 |
+
> title={Edgeface: Efficient face recognition model for edge devices},
|
17 |
+
> author={George, Anjith and Ecabert, Christophe and Shahreza, Hatef Otroshi and Kotwal, Ketan and Marcel, Sebastien},
|
18 |
+
> journal={IEEE Transactions on Biometrics, Behavior, and Identity Science},
|
19 |
+
> year={2024}
|
20 |
+
> }
|
21 |
+
> ```
|
22 |
+
|
23 |
+
|
24 |
+
## Usage
|
25 |
+
### Clone Repositories
|
26 |
+
```bash
|
27 |
+
# Clone the repository
|
28 |
+
git clone https://github.com/danhtran2mind/SlimFace
|
29 |
+
|
30 |
+
# Navigate into the newly created 'slimface' directory.
|
31 |
+
cd SlimFace
|
32 |
+
```
|
33 |
+
### Install Dependencies
|
34 |
+
**If Open-CV (CV2) does not work, run below CLI**
|
35 |
+
```bash
|
36 |
+
sudo apt update
|
37 |
+
sudo apt install -y libglib2.0-0
|
38 |
+
sudo apt install -y libgl1-mesa-dev
|
39 |
+
```
|
40 |
+
### Default install Dependencies
|
41 |
+
```bash
|
42 |
+
pip install -r requirements/requirements.txt
|
43 |
+
```
|
44 |
+
### Other install Dependencies
|
45 |
+
- For My Compatible
|
46 |
+
```bash
|
47 |
+
pip install -r requirements/requirements_compatible.txt
|
48 |
+
```
|
49 |
+
- For `End2end Inference`
|
50 |
+
```bash
|
51 |
+
pip install -r requirements/requirements_inference.txt
|
52 |
+
```
|
53 |
+
### Download Model Checkpoints
|
54 |
+
```bash
|
55 |
+
python scripts/download_ckpts.py
|
56 |
+
```
|
57 |
+
### Setup Third Party
|
58 |
+
```bash
|
59 |
+
python scripts/setup_third_party.py
|
60 |
+
```
|
61 |
+
## Data Preparation
|
62 |
+
|
63 |
+
## Pre-trained Model preparation
|
64 |
+
For detailed instructions on how to process and manage your data effectively, refer to the [Full guide for data processing](./docs/data_processing.md).
|
65 |
+
|
66 |
+
This is fast usage for dataset preparation
|
67 |
+
```bash
|
68 |
+
python scripts/process_dataset.py
|
69 |
+
```
|
70 |
+
## Training
|
71 |
+
|
72 |
+
1. Configure the default settings for Accelerate:
|
73 |
+
```bash
|
74 |
+
accelerate config default
|
75 |
+
```
|
76 |
+
|
77 |
+
2. Launch the training script using Accelerate:
|
78 |
+
```bash
|
79 |
+
accelerate launch src/slimface/training/accelerate_train.py
|
80 |
+
```
|
81 |
+
|
82 |
+
For additional help, you can refer to the [Training Documentation](./docs/training/training_docs.md) for more details.
|
83 |
+
|
84 |
+
## Demostration
|
85 |
+
```bash
|
86 |
+
python apps/gradio_app.py
|
87 |
+
```
|
88 |
+
|
89 |
+
https://huggingface.co/spaces/danhtran2mind/slimface
|
90 |
+
|
91 |
+
## Project Description
|
92 |
+
|
93 |
+
This repository is trained from [](https://github.com/danhtran2mind/edgeface), a fork of [](https://github.com/otroshi/edgeface), with numerous bug fixes and rewritten code for improved performance and stability.
|
SlimFace/apps/gradio_app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_app.inference import run_inference
|
3 |
+
|
4 |
+
def create_gradio_interface():
|
5 |
+
return gr.Interface(
|
6 |
+
fn=run_inference,
|
7 |
+
inputs=[
|
8 |
+
gr.Image(type="pil", label="Upload Image"),
|
9 |
+
gr.File(label="Reference Dict JSON File"),
|
10 |
+
gr.File(label="Index to Class Mapping JSON File"),
|
11 |
+
gr.File(label="Classifier Model (.pth) File"),
|
12 |
+
gr.Textbox(label="EdgeFace Model Name", value="edgeface_base"),
|
13 |
+
gr.Textbox(label="EdgeFace Model Directory", value="ckpts/idiap"),
|
14 |
+
gr.Dropdown(choices=["yolo", "mtcnn"], label="Face Detection Algorithm", value="yolo"),
|
15 |
+
gr.Dropdown(choices=["auto", "cpu", "gpu"], label="Accelerator", value="auto"),
|
16 |
+
gr.Slider(minimum=112, maximum=448, step=1, value=224, label="Resolution"),
|
17 |
+
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.6, label="Similarity Threshold")
|
18 |
+
],
|
19 |
+
outputs="text",
|
20 |
+
title="Face Classification with EdgeFace Validation",
|
21 |
+
description="Upload an image and required files to perform face classification with EdgeFace embedding validation."
|
22 |
+
)
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
iface = create_gradio_interface()
|
26 |
+
iface.launch()
|
SlimFace/apps/gradio_app/.gitkeep
ADDED
File without changes
|
SlimFace/apps/gradio_app/__init__.py
ADDED
File without changes
|
SlimFace/apps/gradio_app/inference.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
# Append the path to the inference script's directory
|
6 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src', 'slimface', 'inference')))
|
7 |
+
from end2end_inference import cinference_and_confirm
|
8 |
+
|
9 |
+
def run_inference(image, reference_dict_path, index_to_class_mapping_path, model_path,
|
10 |
+
edgeface_model_name="edgeface_base", edgeface_model_dir="ckpts/idiap",
|
11 |
+
algorithm="yolo", accelerator="auto", resolution=224, similarity_threshold=0.6):
|
12 |
+
# Save uploaded image temporarily in apps/gradio_app/
|
13 |
+
temp_image_path = os.path.join(os.path.dirname(__file__), "temp_image.jpg")
|
14 |
+
image.save(temp_image_path)
|
15 |
+
|
16 |
+
# Create args object to mimic command-line arguments
|
17 |
+
class Args:
|
18 |
+
def __init__(self):
|
19 |
+
self.unknown_image_path = temp_image_path
|
20 |
+
self.reference_dict_path = reference_dict_path.name if reference_dict_path else None
|
21 |
+
self.index_to_class_mapping_path = index_to_class_mapping_path.name if index_to_class_mapping_path else None
|
22 |
+
self.model_path = model_path.name if model_path else None
|
23 |
+
self.edgeface_model_name = edgeface_model_name
|
24 |
+
self.edgeface_model_dir = edgeface_model_dir
|
25 |
+
self.algorithm = algorithm
|
26 |
+
self.accelerator = accelerator
|
27 |
+
self.resolution = resolution
|
28 |
+
self.similarity_threshold = similarity_threshold
|
29 |
+
|
30 |
+
args = Args()
|
31 |
+
|
32 |
+
# Validate inputs
|
33 |
+
if not all([args.reference_dict_path, args.index_to_class_mapping_path, args.model_path]):
|
34 |
+
return "Error: Please provide all required files (reference dict, index-to-class mapping, and model)."
|
35 |
+
|
36 |
+
try:
|
37 |
+
# Call the inference function from end2end_inference.py
|
38 |
+
results = cinference_and_confirm(args)
|
39 |
+
|
40 |
+
# Format output
|
41 |
+
output = ""
|
42 |
+
for result in results:
|
43 |
+
output += f"Image: {result['image_path']}\n"
|
44 |
+
output += f"Predicted Class: {result['predicted_class']}\n"
|
45 |
+
output += f"Confidence: {result['confidence']:.4f}\n"
|
46 |
+
output += f"Similarity: {result.get('similarity', 'N/A'):.4f}\n"
|
47 |
+
output += f"Confirmed: {result.get('confirmed', 'N/A')}\n\n"
|
48 |
+
|
49 |
+
return output
|
50 |
+
|
51 |
+
except Exception as e:
|
52 |
+
return f"Error: {str(e)}"
|
53 |
+
|
54 |
+
finally:
|
55 |
+
# Clean up temporary image
|
56 |
+
if os.path.exists(temp_image_path):
|
57 |
+
os.remove(temp_image_path)
|
SlimFace/assets/comparision.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
slim_face_vit_b_16
|
2 |
+
Train loss: 0.0074, Train acc: 0.9980, Val loss: 0.2179, Val acc: 0.9336
|
3 |
+
|
4 |
+
efficientnet_b3
|
5 |
+
Train loss: 0.0014, Train acc: 1.0000, Val loss: 0.1931, Val acc: 0.9427
|
6 |
+
|
7 |
+
efficientnet_v2_s
|
8 |
+
Train loss: 0.0016, Train acc: 1.0000, Val loss: 0.2374, Val acc: 0.9375
|
9 |
+
|
10 |
+
regnet_y_800mf
|
11 |
+
Train loss: 0.0033, Train acc: 0.9997, Val loss: 0.3766, Val acc: 0.8906
|
SlimFace/assets/examples/.gitkeep
ADDED
File without changes
|
SlimFace/ckpts/.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore everything in this directory
|
2 |
+
*
|
3 |
+
# Except this .gitignore file
|
4 |
+
!.gitignore
|
SlimFace/configs/accelerate_config.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
compute_environment: LOCAL_MACHINE
|
2 |
+
distributed_type: FSDP
|
3 |
+
num_processes: 4
|
4 |
+
mixed_precision: fp16
|
5 |
+
fsdp_config:
|
6 |
+
fsdp_offload_params: true
|
SlimFace/configs/image_classification_models_config.yaml
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# For more details on models, see https://pytorch.org/vision/main/models.html
|
2 |
+
|
3 |
+
# EfficientNet models: Designed for efficiency with compound scaling of depth, width, and resolution.
|
4 |
+
# These models balance accuracy and computational efficiency, ideal for resource-constrained environments.
|
5 |
+
efficientnet_b0:
|
6 |
+
metrics:
|
7 |
+
Acc@1: 77.692 # Top-1 accuracy on ImageNet
|
8 |
+
Acc@5: 93.532 # Top-5 accuracy on ImageNet
|
9 |
+
GFLOPS: 0.39 # Computational complexity
|
10 |
+
Params: 5.3M # Number of parameters
|
11 |
+
model_fn: models.efficientnet_b0
|
12 |
+
resolution: 224 # Input image resolution
|
13 |
+
weights: models.EfficientNet_B0_Weights.IMAGENET1K_V1 # Pretrained weights on ImageNet
|
14 |
+
|
15 |
+
efficientnet_b1:
|
16 |
+
metrics:
|
17 |
+
Acc@1: 78.642
|
18 |
+
Acc@5: 94.186
|
19 |
+
GFLOPS: 0.69
|
20 |
+
Params: 7.8M
|
21 |
+
model_fn: models.efficientnet_b1
|
22 |
+
resolution: 240
|
23 |
+
weights: models.EfficientNet_B1_Weights.IMAGENET1K_V1
|
24 |
+
|
25 |
+
efficientnet_b2:
|
26 |
+
metrics:
|
27 |
+
Acc@1: 80.608
|
28 |
+
Acc@5: 95.31
|
29 |
+
GFLOPS: 1.09
|
30 |
+
Params: 9.1M
|
31 |
+
model_fn: models.efficientnet_b2
|
32 |
+
resolution: 260
|
33 |
+
weights: models.EfficientNet_B2_Weights.IMAGENET1K_V1
|
34 |
+
|
35 |
+
efficientnet_b3:
|
36 |
+
metrics:
|
37 |
+
Acc@1: 82.008
|
38 |
+
Acc@5: 96.054
|
39 |
+
GFLOPS: 1.83
|
40 |
+
Params: 12.2M
|
41 |
+
model_fn: models.efficientnet_b3
|
42 |
+
resolution: 300
|
43 |
+
weights: models.EfficientNet_B3_Weights.IMAGENET1K_V1
|
44 |
+
|
45 |
+
efficientnet_b4:
|
46 |
+
metrics:
|
47 |
+
Acc@1: 83.384
|
48 |
+
Acc@5: 96.594
|
49 |
+
GFLOPS: 4.39
|
50 |
+
Params: 19.3M
|
51 |
+
model_fn: models.efficientnet_b4
|
52 |
+
resolution: 380
|
53 |
+
weights: models.EfficientNet_B4_Weights.IMAGENET1K_V1
|
54 |
+
|
55 |
+
efficientnet_b5:
|
56 |
+
metrics:
|
57 |
+
Acc@1: 83.444
|
58 |
+
Acc@5: 96.628
|
59 |
+
GFLOPS: 10.27
|
60 |
+
Params: 30.4M
|
61 |
+
model_fn: models.efficientnet_b5
|
62 |
+
resolution: 456
|
63 |
+
weights: models.EfficientNet_B5_Weights.IMAGENET1K_V1
|
64 |
+
|
65 |
+
efficientnet_b6:
|
66 |
+
metrics:
|
67 |
+
Acc@1: 84.008
|
68 |
+
Acc@5: 96.916
|
69 |
+
GFLOPS: 19.07
|
70 |
+
Params: 43.0M
|
71 |
+
model_fn: models.efficientnet_b6
|
72 |
+
resolution: 528
|
73 |
+
weights: models.EfficientNet_B6_Weights.IMAGENET1K_V1
|
74 |
+
|
75 |
+
efficientnet_b7:
|
76 |
+
metrics:
|
77 |
+
Acc@1: 84.122
|
78 |
+
Acc@5: 96.908
|
79 |
+
GFLOPS: 37.75
|
80 |
+
Params: 66.3M
|
81 |
+
model_fn: models.efficientnet_b7
|
82 |
+
resolution: 600
|
83 |
+
weights: models.EfficientNet_B7_Weights.IMAGENET1K_V1
|
84 |
+
|
85 |
+
# EfficientNet V2 models: Improved training efficiency and performance over V1.
|
86 |
+
# These models use progressive learning and optimized scaling for better accuracy.
|
87 |
+
efficientnet_v2_l:
|
88 |
+
metrics:
|
89 |
+
Acc@1: 85.808
|
90 |
+
Acc@5: 97.788
|
91 |
+
GFLOPS: 56.08
|
92 |
+
Params: 118.5M
|
93 |
+
model_fn: models.efficientnet_v2_l
|
94 |
+
resolution: 480
|
95 |
+
weights: models.EfficientNet_V2_L_Weights.IMAGENET1K_V1
|
96 |
+
|
97 |
+
efficientnet_v2_m:
|
98 |
+
metrics:
|
99 |
+
Acc@1: 85.112
|
100 |
+
Acc@5: 97.156
|
101 |
+
GFLOPS: 24.58
|
102 |
+
Params: 54.1M
|
103 |
+
model_fn: models.efficientnet_v2_m
|
104 |
+
resolution: 480
|
105 |
+
weights: models.EfficientNet_V2_M_Weights.IMAGENET1K_V1
|
106 |
+
|
107 |
+
efficientnet_v2_s:
|
108 |
+
metrics:
|
109 |
+
Acc@1: 84.228
|
110 |
+
Acc@5: 96.878
|
111 |
+
GFLOPS: 8.37
|
112 |
+
Params: 21.5M
|
113 |
+
model_fn: models.efficientnet_v2_s
|
114 |
+
resolution: 384
|
115 |
+
weights: models.EfficientNet_V2_S_Weights.IMAGENET1K_V1
|
116 |
+
|
117 |
+
# RegNet models: Designed for scalability and efficiency with a focus on network design.
|
118 |
+
# These models optimize for both accuracy and computational efficiency.
|
119 |
+
regnet_y_128gf:
|
120 |
+
metrics:
|
121 |
+
Acc@1: 86.068 # High accuracy but computationally expensive
|
122 |
+
Acc@5: 97.844
|
123 |
+
GFLOPS: 127.52
|
124 |
+
Params: 644.8M
|
125 |
+
model_fn: models.regnet_y_128gf
|
126 |
+
resolution: 224
|
127 |
+
weights: models.RegNet_Y_128GF_Weights.IMAGENET1K_SWAG_LINEAR_V1
|
128 |
+
|
129 |
+
regnet_y_16gf:
|
130 |
+
metrics:
|
131 |
+
Acc@1: 82.886
|
132 |
+
Acc@5: 96.328
|
133 |
+
GFLOPS: 15.91
|
134 |
+
Params: 83.6M
|
135 |
+
model_fn: models.regnet_y_16gf
|
136 |
+
resolution: 224
|
137 |
+
weights: models.RegNet_Y_16GF_Weights.IMAGENET1K_V2
|
138 |
+
|
139 |
+
regnet_y_1_6gf:
|
140 |
+
metrics:
|
141 |
+
Acc@1: 80.876
|
142 |
+
Acc@5: 95.444
|
143 |
+
GFLOPS: 1.61
|
144 |
+
Params: 11.2M
|
145 |
+
model_fn: models.regnet_y_1_6gf
|
146 |
+
resolution: 224
|
147 |
+
weights: models.RegNet_Y_1_6GF_Weights.IMAGENET1K_V2
|
148 |
+
|
149 |
+
regnet_y_32gf:
|
150 |
+
metrics:
|
151 |
+
Acc@1: 83.368
|
152 |
+
Acc@5: 96.498
|
153 |
+
GFLOPS: 32.28
|
154 |
+
Params: 145.0M
|
155 |
+
model_fn: models.regnet_y_32gf
|
156 |
+
resolution: 224
|
157 |
+
weights: models.RegNet_Y_32GF_Weights.IMAGENET1K_V2
|
158 |
+
|
159 |
+
regnet_y_3_2gf:
|
160 |
+
metrics:
|
161 |
+
Acc@1: 81.982
|
162 |
+
Acc@5: 95.972
|
163 |
+
GFLOPS: 3.18
|
164 |
+
Params: 19.4M
|
165 |
+
model_fn: models.regnet_y_3_2gf
|
166 |
+
resolution: 224
|
167 |
+
weights: models.RegNet_Y_3_2GF_Weights.IMAGENET1K_V2
|
168 |
+
|
169 |
+
regnet_y_400mf:
|
170 |
+
metrics:
|
171 |
+
Acc@1: 75.804
|
172 |
+
Acc@5: 92.742
|
173 |
+
GFLOPS: 0.4
|
174 |
+
Params: 4.3M
|
175 |
+
model_fn: models.regnet_y_400mf
|
176 |
+
resolution: 224
|
177 |
+
weights: models.RegNet_Y_400MF_Weights.IMAGENET1K_V2
|
178 |
+
|
179 |
+
regnet_y_800mf:
|
180 |
+
metrics:
|
181 |
+
Acc@1: 78.828
|
182 |
+
Acc@5: 94.502
|
183 |
+
GFLOPS: 0.83
|
184 |
+
Params: 6.4M
|
185 |
+
model_fn: models.regnet_y_800mf
|
186 |
+
resolution: 224
|
187 |
+
weights: models.RegNet_Y_800MF_Weights.IMAGENET1K_V2
|
188 |
+
|
189 |
+
regnet_y_8gf:
|
190 |
+
metrics:
|
191 |
+
Acc@1: 82.828
|
192 |
+
Acc@5: 96.33
|
193 |
+
GFLOPS: 8.47
|
194 |
+
Params: 39.4M
|
195 |
+
model_fn: models.regnet_y_8gf
|
196 |
+
resolution: 224
|
197 |
+
weights: models.RegNet_Y_8GF_Weights.IMAGENET1K_V2
|
198 |
+
|
199 |
+
# Vision Transformer (ViT) models: Transformer-based architecture for image classification.
|
200 |
+
# These models excel in capturing long-range dependencies but require significant compute for larger variants.
|
201 |
+
vit_b_16:
|
202 |
+
metrics:
|
203 |
+
Acc@1: 81.072 # Base ViT model with balanced accuracy and efficiency
|
204 |
+
Acc@5: 95.318
|
205 |
+
GFLOPS: 17.56
|
206 |
+
Params: 86.6M
|
207 |
+
model_fn: models.vit_b_16
|
208 |
+
resolution: 224
|
209 |
+
weights: models.ViT_B_16_Weights.IMAGENET1K_V1
|
210 |
+
|
211 |
+
vit_b_32:
|
212 |
+
metrics:
|
213 |
+
Acc@1: 75.912 # Smaller patch size version of ViT, lower accuracy but fewer computations
|
214 |
+
Acc@5: 92.466
|
215 |
+
GFLOPS: 4.41
|
216 |
+
Params: 88.2M
|
217 |
+
model_fn: models.vit_b_32
|
218 |
+
resolution: 224
|
219 |
+
weights: models.ViT_B_32_Weights.IMAGENET1K_V1
|
220 |
+
|
221 |
+
vit_h_14:
|
222 |
+
metrics:
|
223 |
+
Acc@1: 88.552 # High-performance ViT model with very high accuracy and computational cost
|
224 |
+
Acc@5: 98.694
|
225 |
+
GFLOPS: 1016.72
|
226 |
+
Params: 633.5M
|
227 |
+
model_fn: models.vit_h_14
|
228 |
+
resolution: 224
|
229 |
+
weights: models.ViT_H_14_Weights.IMAGENET1K_SWAG_E2E_V1
|
230 |
+
|
231 |
+
vit_l_16:
|
232 |
+
metrics:
|
233 |
+
Acc@1: 79.662 # Larger ViT model with improved accuracy over base models
|
234 |
+
Acc@5: 94.638
|
235 |
+
GFLOPS: 61.55
|
236 |
+
Params: 304.3M
|
237 |
+
model_fn: models.vit_l_16
|
238 |
+
resolution: 224
|
239 |
+
weights: models.ViT_L_16_Weights.IMAGENET1K_V1
|
240 |
+
|
241 |
+
vit_l_32:
|
242 |
+
metrics:
|
243 |
+
Acc@1: 76.972 # Larger ViT with larger patch size, trading accuracy for reduced compute
|
244 |
+
Acc@5: 93.07
|
245 |
+
GFLOPS: 15.38
|
246 |
+
Params: 306.5M
|
247 |
+
model_fn: models.vit_l_32
|
248 |
+
resolution: 224
|
249 |
+
weights: models.ViT_L_32_Weights.IMAGENET1K_V1
|
SlimFace/data/.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore everything in this directory
|
2 |
+
*
|
3 |
+
# Except this .gitignore file
|
4 |
+
!.gitignore
|
5 |
+
!raw
|
SlimFace/data/raw/.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore everything in this directory
|
2 |
+
*
|
3 |
+
# Except this .gitignore file
|
4 |
+
!.gitignore
|
SlimFace/docs/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# This is Docs
|
SlimFace/docs/data/data_processing.md
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Data Processing for slimface Training 🖼️
|
2 |
+
|
3 |
+
## Table of Contents
|
4 |
+
|
5 |
+
- [Data Processing for slimface Training 🖼️](#data-processing-for-slimface-training-)
|
6 |
+
- [Command-Line Arguments](#command-line-arguments)
|
7 |
+
- [Command-Line Arguments for `process_dataset.py`](#command-line-arguments-for-process_datasetpy)
|
8 |
+
- [Example Usage](#example-usage)
|
9 |
+
- [Step-by-step process for handling a dataset](#step-by-step-process-for-handling-a-dataset)
|
10 |
+
- [Step 1: Clone the Repository](#step-1-clone-the-repository)
|
11 |
+
- [Step 2: Process the Dataset](#step-2-process-the-dataset)
|
12 |
+
- [Option 1: Using Dataset from Kaggle](#option-1-using-dataset-from-kaggle)
|
13 |
+
- [Option 2: Using a Custom Dataset](#option-2-using-a-custom-dataset)
|
14 |
+
|
15 |
+
## Command-Line Arguments
|
16 |
+
### Command-Line Arguments for `process_dataset.py`
|
17 |
+
When running `python scripts/process_dataset.py`, you can customize the dataset processing with the following command-line arguments:
|
18 |
+
|
19 |
+
| Argument | Type | Default | Description |
|
20 |
+
|----------|------|---------|-------------|
|
21 |
+
| `--dataset_slug` | `str` | `vasukipatel/face-recognition-dataset` | The Kaggle dataset slug in `username/dataset-name` format. Specifies which dataset to download from Kaggle. |
|
22 |
+
| `--base_dir` | `str` | `./data` | The base directory where the dataset will be stored and processed. |
|
23 |
+
| `--augment` | `flag` | `False` | Enables data augmentation (e.g., flipping, rotation) for training images to increase dataset variety. Use `--augment` to enable. |
|
24 |
+
| `--random_state` | `int` | `42` | Random seed for reproducibility in the train-test split. Ensures consistent splitting across runs. |
|
25 |
+
| `--test_split_rate` | `float` | `0.2` | Proportion of data to use for validation (between 0 and 1). For example, `0.2` means 20% of the data is used for validation. |
|
26 |
+
| `--rotation_range` | `int` | `15` | Maximum rotation angle in degrees for data augmentation (if `--augment` is enabled). Images may be rotated randomly within this range. |
|
27 |
+
| `--source_subdir` | `str` | `Original Images/Original Images` | Subdirectory within `raw_dir` containing the images to process. Used for both Kaggle and custom datasets. |
|
28 |
+
| `--delete_raw` | `flag` | `False` | Deletes the raw folder after processing to save storage. Use `--delete_raw` to enable. |
|
29 |
+
|
30 |
+
### Example Usage
|
31 |
+
To process a Kaggle dataset with augmentation and a custom validation split:
|
32 |
+
|
33 |
+
```bash
|
34 |
+
python scripts/process_dataset.py \
|
35 |
+
--augment \
|
36 |
+
--test_split_rate 0.3 \
|
37 |
+
--rotation_range 15
|
38 |
+
```
|
39 |
+
|
40 |
+
To process a **custom dataset** with a specific subdirectory and delete the raw folder:
|
41 |
+
|
42 |
+
```bash
|
43 |
+
python scripts/process_dataset.py \
|
44 |
+
--source_subdir your_custom_dataset_dir \
|
45 |
+
--delete_raw
|
46 |
+
```
|
47 |
+
## Step-by-step process for handling a dataset
|
48 |
+
These options allow flexible dataset processing tailored to your needs. 🚀
|
49 |
+
|
50 |
+
### Step 1: Clone the Repository
|
51 |
+
Ensure the `slimface` project is set up by cloning the repository and navigating to the project directory:
|
52 |
+
|
53 |
+
```bash
|
54 |
+
git clone https://github.com/danhtran2mind/slimface/
|
55 |
+
cd slimface
|
56 |
+
```
|
57 |
+
|
58 |
+
### Step 2: Process the Dataset
|
59 |
+
|
60 |
+
#### Option 1: Using Dataset from Kaggle
|
61 |
+
To download and process the sample dataset from Kaggle, run:
|
62 |
+
|
63 |
+
```bash
|
64 |
+
python scripts/process_dataset.py
|
65 |
+
```
|
66 |
+
|
67 |
+
This script organizes the dataset into the following structure under `data/`:
|
68 |
+
|
69 |
+
```markdown
|
70 |
+
data/
|
71 |
+
├── processed_ds/
|
72 |
+
│ ├── train_data/
|
73 |
+
│ │ ├── Charlize Theron/
|
74 |
+
│ │ │ ├── Charlize Theron_70.jpg
|
75 |
+
│ │ │ ├── Charlize Theron_46.jpg
|
76 |
+
│ │ │ ...
|
77 |
+
│ │ ├── Dwayne Johnson/
|
78 |
+
│ │ │ ├── Dwayne Johnson_58.jpg
|
79 |
+
│ │ │ ├── Dwayne Johnson_9.jpg
|
80 |
+
│ │ │ ...
|
81 |
+
│ └── val_data/
|
82 |
+
│ ├── Charlize Theron/
|
83 |
+
│ │ ├── Charlize Theron_60.jpg
|
84 |
+
│ │ ├── Charlize Theron_45.jpg
|
85 |
+
│ │ ...
|
86 |
+
│ ├── Dwayne Johnson/
|
87 |
+
│ │ ├── Dwayne Johnson_11.jpg
|
88 |
+
│ │ ├── Dwayne Johnson_46.jpg
|
89 |
+
│ │ ...
|
90 |
+
├── raw/
|
91 |
+
│ ├── Faces/
|
92 |
+
│ │ ├── Jessica Alba_90.jpg
|
93 |
+
│ │ ├── Hugh Jackman_70.jpg
|
94 |
+
│ │ ...
|
95 |
+
│ ├── Original Images/
|
96 |
+
│ │ ├── Charlize Theron/
|
97 |
+
│ │ │ ├── Charlize Theron_60.jpg
|
98 |
+
│ │ │ ├── Charlize Theron_70.jpg
|
99 |
+
│ │ │ ...
|
100 |
+
│ │ ├── Dwayne Johnson/
|
101 |
+
│ │ │ ├── Dwayne Johnson_11.jpg
|
102 |
+
│ │ │ ├── Dwayne Johnson_58.jpg
|
103 |
+
│ │ │ ...
|
104 |
+
│ ├── dataset.zip
|
105 |
+
│ └── Dataset.csv
|
106 |
+
└── .gitignore
|
107 |
+
```
|
108 |
+
|
109 |
+
#### Option 2: Using a Custom Dataset
|
110 |
+
If you prefer to use your own dataset, place it in `./data/raw/your_custom_dataset_dir/` with the following structure:
|
111 |
+
|
112 |
+
```markdown
|
113 |
+
data/
|
114 |
+
├── raw/
|
115 |
+
│ ├── your_custom_dataset_dir/
|
116 |
+
│ │ ├── Charlize Theron/
|
117 |
+
│ │ │ ├── Charlize Theron_60.jpg
|
118 |
+
│ │ │ ├── Charlize Theron_70.jpg
|
119 |
+
│ │ │ ...
|
120 |
+
│ │ ├── Dwayne Johnson/
|
121 |
+
│ │ │ ├── Dwayne Johnson_11.jpg
|
122 |
+
│ │ │ ├── Dwayne Johnson_58.jpg
|
123 |
+
│ │ │ ...
|
124 |
+
```
|
125 |
+
|
126 |
+
If you use your dataset, you do not need to include only human faces, because **we support face extraction using face detection**, and all extracted faces are saved at `data/processed_ds`.
|
127 |
+
|
128 |
+
Then, process your custom dataset by specifying the subdirectory:
|
129 |
+
|
130 |
+
```bash
|
131 |
+
python scripts/process_dataset.py \
|
132 |
+
--source_subdir your_custom_dataset_dir
|
133 |
+
```
|
134 |
+
|
135 |
+
This ensures your dataset is properly formatted for training. 🚀
|
SlimFace/docs/inference/inference_doc.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```bash
|
2 |
+
python src/slim_face/inference/inference.py \
|
3 |
+
--input_path <image_path> \
|
4 |
+
--model_path <model_path> \
|
5 |
+
--index_to_class_mapping_path <index_to_class_mapping_json_path>
|
6 |
+
```
|
7 |
+
|
8 |
+
## Example Usage
|
9 |
+
|
10 |
+
```bash
|
11 |
+
python src/slim_face/inference/inference.py \
|
12 |
+
--input_path "assets/test_images/Elon_Musk.jpg" \
|
13 |
+
--model_path "ckpts/slim_face_regnet_y_800mf_full_model.pth" \
|
14 |
+
--index_to_class_mapping_path ckpts/index_to_class_mapping.json
|
15 |
+
```
|
SlimFace/docs/test/inference_test_doc.md
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Running Inference Test Script
|
2 |
+
|
3 |
+
Instructions to run the `tests/inference_test.sh` script in the `tests` folder on Linux, Windows, and macOS.
|
4 |
+
|
5 |
+
## Prerequisites
|
6 |
+
1. Install Python, PyTorch, Accelerate:
|
7 |
+
```bash
|
8 |
+
pip install -r requirements/requirements.txt
|
9 |
+
```
|
10 |
+
2. Create virtual environment:
|
11 |
+
```bash
|
12 |
+
python -m venv venv
|
13 |
+
source venv/bin/activate # Linux/macOS
|
14 |
+
.\venv\Scripts\activate # Windows
|
15 |
+
```
|
16 |
+
3. Make script executable (Linux/macOS):
|
17 |
+
```bash
|
18 |
+
chmod +x tests/inference_test.sh
|
19 |
+
```
|
20 |
+
|
21 |
+
## Linux
|
22 |
+
1. Open terminal, go to folder:
|
23 |
+
```bash
|
24 |
+
cd tests
|
25 |
+
```
|
26 |
+
2. Run script:
|
27 |
+
```bash
|
28 |
+
./inference_test.sh
|
29 |
+
```
|
30 |
+
3. **Fix issues**:
|
31 |
+
- Use `bash inference_test.sh` if `./` fails.
|
32 |
+
- Fix line endings:
|
33 |
+
```bash
|
34 |
+
sudo apt install dos2unix
|
35 |
+
dos2unix inference_test.sh
|
36 |
+
```
|
37 |
+
|
38 |
+
## Windows (using WSL)
|
39 |
+
1. Install WSL and Ubuntu from Microsoft Store.
|
40 |
+
2. Install dependencies:
|
41 |
+
```bash
|
42 |
+
sudo apt update
|
43 |
+
sudo apt install python3 python3-pip
|
44 |
+
pip install -r requirements/requirements.txt
|
45 |
+
```
|
46 |
+
3. Go to folder:
|
47 |
+
```bash
|
48 |
+
cd ./tests
|
49 |
+
```
|
50 |
+
4. Make executable:
|
51 |
+
```bash
|
52 |
+
chmod +x inference_test.sh
|
53 |
+
```
|
54 |
+
5. Run script:
|
55 |
+
```bash
|
56 |
+
./inference_test.sh
|
57 |
+
```
|
58 |
+
6. **Fix issues**:
|
59 |
+
- Fix line endings:
|
60 |
+
```bash
|
61 |
+
sudo apt install dos2unix
|
62 |
+
dos2unix inference_test.sh
|
63 |
+
```
|
64 |
+
|
65 |
+
## macOS
|
66 |
+
1. Open Terminal, go to folder:
|
67 |
+
```bash
|
68 |
+
cd tests
|
69 |
+
```
|
70 |
+
2. Install dependencies:
|
71 |
+
```bash
|
72 |
+
brew install python
|
73 |
+
pip install -r requirements/requirements.txt
|
74 |
+
```
|
75 |
+
3. Make executable:
|
76 |
+
```bash
|
77 |
+
chmod +x inference_test.sh
|
78 |
+
```
|
79 |
+
4. Run script:
|
80 |
+
```bash
|
81 |
+
./inference_test.sh
|
82 |
+
```
|
83 |
+
5. **Fix issues**:
|
84 |
+
- Fix line endings:
|
85 |
+
```bash
|
86 |
+
brew install dos2unix
|
87 |
+
dos2unix inference_test.sh
|
88 |
+
```
|
89 |
+
|
90 |
+
## Notes
|
91 |
+
- Ensure GPU support (CUDA for Linux/Windows, MPS for macOS) if needed.
|
92 |
+
- Check script for extra settings (e.g., `export CUDA_VISIBLE_DEVICES=0`).
|
93 |
+
- Save output:
|
94 |
+
```bash
|
95 |
+
./inference_test.sh > output.log 2>&1
|
96 |
+
```
|
SlimFace/docs/test/training_test_doc.md
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Running Training Scripts
|
2 |
+
|
3 |
+
Instructions to run these scripts in the `tests` folder on Linux, Windows, and macOS:
|
4 |
+
- `tests/training_accelerate_efficientnet_b3.sh`
|
5 |
+
- `tests/training_accelerate_efficientnet_v2_s.sh`
|
6 |
+
- `tests/training_accelerate_regnet_y_800mf.sh`
|
7 |
+
- `tests/training_accelerate_vit_b_16_test.sh`
|
8 |
+
|
9 |
+
## Prerequisites
|
10 |
+
1. Install Python, PyTorch, Accelerate:
|
11 |
+
```bash
|
12 |
+
pip install requirements/requirements.txt
|
13 |
+
```
|
14 |
+
2. Create virtual environment:
|
15 |
+
```bash
|
16 |
+
python -m venv venv
|
17 |
+
source venv/bin/activate # Linux/macOS
|
18 |
+
.\venv\Scripts\activate # Windows
|
19 |
+
```
|
20 |
+
3. Make scripts executable (Linux/macOS):
|
21 |
+
```bash
|
22 |
+
chmod +x tests/*.sh
|
23 |
+
```
|
24 |
+
|
25 |
+
## Linux
|
26 |
+
1. Open terminal, go to folder:
|
27 |
+
```bash
|
28 |
+
cd tests
|
29 |
+
```
|
30 |
+
2. Run scripts:
|
31 |
+
```bash
|
32 |
+
./training_accelerate_efficientnet_b3.sh
|
33 |
+
./training_accelerate_efficientnet_v2_s.sh
|
34 |
+
./training_accelerate_regnet_y_800mf.sh
|
35 |
+
./training_accelerate_vit_b_16_test.sh
|
36 |
+
```
|
37 |
+
3. **Fix issues**:
|
38 |
+
- Use `bash training_accelerate_efficientnet_b3.sh` if `./` fails.
|
39 |
+
- Fix line endings:
|
40 |
+
```bash
|
41 |
+
sudo apt install dos2unix
|
42 |
+
dos2unix training_accelerate_*.sh
|
43 |
+
```
|
44 |
+
|
45 |
+
## Windows (using WSL)
|
46 |
+
1. Install WSL and Ubuntu from Microsoft Store.
|
47 |
+
2. Install dependencies:
|
48 |
+
```bash
|
49 |
+
sudo apt update
|
50 |
+
sudo apt install python3 python3-pip
|
51 |
+
pip install -r requirements/requirements.txt
|
52 |
+
```
|
53 |
+
3. Go to folder:
|
54 |
+
```bash
|
55 |
+
cd ./tests
|
56 |
+
```
|
57 |
+
4. Make executable:
|
58 |
+
```bash
|
59 |
+
chmod +x training_accelerate_*.sh
|
60 |
+
```
|
61 |
+
5. Run scripts:
|
62 |
+
```bash
|
63 |
+
./training_accelerate_efficientnet_b3.sh
|
64 |
+
```
|
65 |
+
6. **Fix issues**:
|
66 |
+
- Fix line endings:
|
67 |
+
```bash
|
68 |
+
sudo apt install dos2unix
|
69 |
+
dos2unix training_accelerate_*.sh
|
70 |
+
```
|
71 |
+
|
72 |
+
## macOS
|
73 |
+
1. Open Terminal, go to folder:
|
74 |
+
```bash
|
75 |
+
cd tests
|
76 |
+
```
|
77 |
+
2. Install dependencies:
|
78 |
+
```bash
|
79 |
+
brew install python
|
80 |
+
pip install -r requirements/requirements.txt
|
81 |
+
```
|
82 |
+
3. Make executable:
|
83 |
+
```bash
|
84 |
+
chmod +x training_accelerate_*.sh
|
85 |
+
```
|
86 |
+
4. Run scripts:
|
87 |
+
```bash
|
88 |
+
./training_accelerate_efficientnet_b3.sh
|
89 |
+
```
|
90 |
+
5. **Fix issues**:
|
91 |
+
- Fix line endings:
|
92 |
+
```bash
|
93 |
+
brew install dos2unix
|
94 |
+
dos2unix training_accelerate_*.sh
|
95 |
+
```
|
96 |
+
|
97 |
+
## Notes
|
98 |
+
- Ensure GPU support (CUDA for Linux/Windows, MPS for macOS) if needed.
|
99 |
+
- Check scripts for extra settings (e.g., `export CUDA_VISIBLE_DEVICES=0`).
|
100 |
+
- Save output:
|
101 |
+
```bash
|
102 |
+
./training_accelerate_efficientnet_b3.sh > output.log 2>&1
|
103 |
+
```
|
SlimFace/docs/training/training_doc.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Training Documentation
|
3 |
+
|
4 |
+
This document outlines the command-line arguments and a concise overview of the training pipeline for a face classification model using PyTorch Lightning.
|
5 |
+
|
6 |
+
## Table of Contents
|
7 |
+
|
8 |
+
- Arguments Table
|
9 |
+
|
10 |
+
- Training Pipeline Overview
|
11 |
+
|
12 |
+
|
13 |
+
# Training Arguments Documentation
|
14 |
+
|
15 |
+
This document outlines the command-line arguments and a concise overview of the training pipeline for a face classification model using PyTorch Lightning.
|
16 |
+
|
17 |
+
## Table of Contents
|
18 |
+
|
19 |
+
- [Arguments Table](#arguments-table)
|
20 |
+
- [Training Pipeline Overview](#training-pipeline-overview)
|
21 |
+
|
22 |
+
## Arguments Table
|
23 |
+
|
24 |
+
| Argument Name | Type | Description |
|
25 |
+
|----------------------------------------|-------|-------------------------------------------------------------------------------------------------------------------------------|
|
26 |
+
| `dataset_dir` | `str` | Path to the dataset directory containing `train_data` and `val_data` subdirectories with preprocessed face images organized by person. |
|
27 |
+
| `image_classification_models_config_path` | `str` | Path to the YAML configuration file defining model configurations, including model function, resolution, and weights. |
|
28 |
+
| `batch_size` | `int` | Batch size for training and validation data loaders. Affects memory usage and training speed. |
|
29 |
+
| `num_epochs` | `int` | Number of epochs for training the model. An epoch is one full pass through the training dataset. |
|
30 |
+
| `learning_rate` | `float` | Initial learning rate for the Adam optimizer used during training. |
|
31 |
+
| `max_lr_factor` | `float` | Multiplies the initial learning rate to determine the maximum learning rate during the warmup phase of the scheduler. |
|
32 |
+
| `accelerator` | `str` | Type of accelerator for training. Options: `cpu`, `gpu`, `tpu`, `auto`. `auto` selects the best available device. |
|
33 |
+
| `devices` | `int` | Number of devices (e.g., GPUs) to use for training. Relevant for multi-GPU training. |
|
34 |
+
| `algorithm` | `str` | Face detection algorithm for preprocessing images. Options: `mtcnn`, `yolo`. |
|
35 |
+
| `warmup_steps` | `float` | Fraction of total training steps for the warmup phase of the learning rate scheduler (e.g., `0.05` means 5% of total steps). |
|
36 |
+
| `total_steps` | `int` | Total number of training steps. If `0`, calculated as epochs × steps per epoch (based on dataset size and batch size). |
|
37 |
+
| `classification_model_name` | `str` | Name of the classification model to use, as defined in the YAML configuration file. |
|
38 |
+
|
39 |
+
## Training Pipeline Overview
|
40 |
+
|
41 |
+
The training pipeline preprocesses face images, fine-tunes a classification head on a pretrained model, and trains using PyTorch Lightning. Key components:
|
42 |
+
|
43 |
+
1. **Preprocessing**: Aligns faces using `yolo` or `mtcnn`, caches resized images (`preprocess_and_cache_images`).
|
44 |
+
2. **Dataset**: `FaceDataset` loads pre-aligned images, applies normalization, and assigns labels by person.
|
45 |
+
3. **Model**: `FaceClassifier` pairs a frozen pretrained model (e.g., EfficientNet) with a custom classification head.
|
46 |
+
4. **Training**: `FaceClassifierLightning` manages training with Adam optimizer, cosine annealing scheduler, and logs loss/accuracy.
|
47 |
+
5. **Configuration**: Loads model details from YAML (`load_model_configs`), uses `DataLoader` with multiprocessing, and saves models via `CustomModelCheckpoint`.
|
48 |
+
6. **Execution**: `main` orchestrates preprocessing, data loading, model training, and saves full model and classifier head.
|
SlimFace/new_contruct.md
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```markdown
|
2 |
+
slim-face-recognition/
|
3 |
+
├── src/ # Source code for the project
|
4 |
+
│ ├── slim_face/ # Main package for your project
|
5 |
+
│ │ ├── __init__.py # Marks directory as a Python package
|
6 |
+
│ │ ├── models/ # Model definitions and architectures
|
7 |
+
│ │ │ ├── __init__.py
|
8 |
+
│ │ │ ├── edgeface.py # Model definitions (e.g., edgeface backbones)
|
9 |
+
│ │ │ └── ... # Other model-related scripts
|
10 |
+
│ │ ├── data/ # Data loading and preprocessing
|
11 |
+
│ │ │ ├── __init__.py
|
12 |
+
│ │ │ ├── dataset.py # Custom Dataset classes for DataLoader
|
13 |
+
│ │ │ ├── align.py # Face alignment utilities (e.g., from edgeface)
|
14 |
+
│ │ │ └── ... # Other data-related scripts
|
15 |
+
│ │ ├── training/ # Training-related scripts and logic
|
16 |
+
│ │ │ ├── __init__.py
|
17 |
+
│ │ │ ├── train.py # Main training script
|
18 |
+
│ │ │ ├── accelerate_train.py # Accelerated training script
|
19 |
+
│ │ │ └── ... # Other training utilities
|
20 |
+
│ │ ├── inference/ # Inference-related scripts and logic
|
21 |
+
│ │ │ ├── __init__.py # Marks directory as a Python package
|
22 |
+
│ │ │ ├── inference.py # Face recognition inference logic
|
23 |
+
│ │ ├── utils/ # Utility functions (e.g., logging, metrics)
|
24 |
+
│ │ │ ├── __init__.py
|
25 |
+
│ │ │ ├── helpers.py # Miscellaneous helper functions
|
26 |
+
│ │ │ └── ... # Other utility scripts
|
27 |
+
│ │ └── __main__.py # Entry point for running the package as a module
|
28 |
+
├── tests/ # Unit and integration tests
|
29 |
+
│ ├── __init__.py
|
30 |
+
│ ├── test_data.py # Tests for data loading
|
31 |
+
│ ├── test_models.py # Tests for model functionality
|
32 |
+
│ ├── test_training.py # Tests for training pipeline
|
33 |
+
│ ├── test_inference.py # Tests for inference pipeline
|
34 |
+
│ ├── test_images/ # Sample images for testing (e.g., Elon_Musk.jpg)
|
35 |
+
├── data/ # Datasets and data-related files
|
36 |
+
│ ├── raw/ # Raw, unprocessed data
|
37 |
+
│ ├── processed/ # Preprocessed data (e.g., aligned faces)
|
38 |
+
│ └── external/ # External datasets (e.g., from Kaggle)
|
39 |
+
├── scripts/ # Standalone scripts for tasks like data download
|
40 |
+
│ ├── download_dataset.py # Script to download datasets (e.g., Kaggle)
|
41 |
+
│ └── preprocess.py # Data preprocessing scripts
|
42 |
+
├── notebooks/ # Jupyter notebooks for exploration and analysis
|
43 |
+
│ ├── ztest.ipynb # Existing notebook for testing/exploration
|
44 |
+
│ └── ... # Other exploratory notebooks
|
45 |
+
├── ckpts/ # Model checkpoints and weights
|
46 |
+
│ ├── edgeface_xs_gamma_06.pt # Pretrained model weights
|
47 |
+
│ ├── edgeface_s_gamma_05.pt # Pretrained model weights
|
48 |
+
│ └── ... # Other checkpoints
|
49 |
+
├── configs/ # Configuration files (e.g., YAML, JSON)
|
50 |
+
│ ├── training.yaml # Training hyperparameters
|
51 |
+
│ └── model.yaml # Model configurations
|
52 |
+
├── docs/ # Documentation files
|
53 |
+
│ ├── api.md # API documentation
|
54 |
+
│ └── usage.md # Usage instructions
|
55 |
+
├── requirements.txt # Main dependencies
|
56 |
+
├── requirements_compatible.txt # Development dependencies (e.g., testing, linting)
|
57 |
+
├── README.md # Project overview and setup instructions
|
58 |
+
├── LICENSE # License file (e.g., MIT, Apache)
|
59 |
+
├── .gitignore # Git ignore file
|
60 |
+
├── .python-version # Python version specification (e.g., for pyenv)
|
61 |
+
├── setup.py # Setup script for packaging the project
|
62 |
+
└── pyproject.toml # Modern Python project configuration (optional)
|
63 |
+
```
|
SlimFace/requirements/requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch>=2.6.0
|
2 |
+
torchvision>=0.21.0
|
3 |
+
timm>=1.0.15
|
4 |
+
mxnet>=1.9.1
|
5 |
+
opencv-python>=4.10.0.84
|
6 |
+
numpy>=1.26.0,<2.0.0
|
7 |
+
pytorch-lightning>=2.5.1
|
8 |
+
tqdm
|
9 |
+
imgaug
|
10 |
+
accelerate>=1.6.0
|
11 |
+
scikit-learn
|
12 |
+
pillow
|
13 |
+
requests
|
14 |
+
ultralytics
|
15 |
+
huggingface-hub>=0.31.1
|
SlimFace/requirements/requirements_compatible.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.6.0
|
2 |
+
torchvision==0.21.0
|
3 |
+
timm==1.0.15
|
4 |
+
mxnet==1.9.1
|
5 |
+
opencv-python==4.11.0.86
|
6 |
+
numpy==1.26.4
|
7 |
+
pillow==11.2.1
|
8 |
+
pytorch-lightning==2.5.1
|
9 |
+
accelerate==1.6.0
|
10 |
+
imgaug==0.4.0
|
11 |
+
scikit-learn==1.6.1
|
12 |
+
pillow==11.2.1
|
13 |
+
requests==2.32.4
|
14 |
+
ultralytics==8.3.160
|
15 |
+
huggingface-hub==0.31.1
|
SlimFace/requirements/requirements_inference.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch>=2.6.0
|
2 |
+
torchvision>=0.21.0
|
3 |
+
timm>=1.0.15
|
4 |
+
mxnet>=1.9.1
|
5 |
+
opencv-python>=4.10.0.84
|
6 |
+
numpy>=1.26.0,<2.0.0
|
7 |
+
ultralytics
|
8 |
+
pytorch-lightning>=2.5.1
|
9 |
+
tqdm
|
10 |
+
imgaug
|
11 |
+
accelerate>=1.6.0
|
12 |
+
scikit-learn
|
13 |
+
pillow
|
14 |
+
requests
|
15 |
+
huggingface-hub>=0.31.1
|
SlimFace/scripts/download_ckpts.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
from huggingface_hub import snapshot_download
|
4 |
+
|
5 |
+
# Model configurations for EdgeFace models
|
6 |
+
model_configs = {
|
7 |
+
"edgeface_base": {
|
8 |
+
"repo": "idiap/EdgeFace-Base",
|
9 |
+
"filename": "edgeface_base.pt",
|
10 |
+
"local_dir": "ckpts/idiap"
|
11 |
+
},
|
12 |
+
"edgeface_s_gamma_05": {
|
13 |
+
"repo": "idiap/EdgeFace-S-GAMMA",
|
14 |
+
"filename": "edgeface_s_gamma_05.pt",
|
15 |
+
"local_dir": "ckpts/idiap"
|
16 |
+
},
|
17 |
+
"edgeface_xs_gamma_06": {
|
18 |
+
"repo": "idiap/EdgeFace-XS-GAMMA",
|
19 |
+
"filename": "edgeface_xs_gamma_06.pt",
|
20 |
+
"local_dir": "ckpts/idiap"
|
21 |
+
},
|
22 |
+
"edgeface_xxs": {
|
23 |
+
"repo": "idiap/EdgeFace-XXS",
|
24 |
+
"filename": "edgeface_xxs.pt",
|
25 |
+
"local_dir": "ckpts/idiap"
|
26 |
+
},
|
27 |
+
"SlimFace_efficientnet_b3": {
|
28 |
+
"repo": "danhtran2mind/SlimFace-sample-checkpoints",
|
29 |
+
"filename": "SlimFace_efficientnet_b3_full_model.pth",
|
30 |
+
"local_dir": "ckpts"
|
31 |
+
},
|
32 |
+
"SlimFace_efficientnet_v2_s": {
|
33 |
+
"repo": "danhtran2mind/SlimFace-sample-checkpoints",
|
34 |
+
"filename": "SlimFace_efficientnet_v2_s_full_model.pth",
|
35 |
+
"local_dir": "ckpts"
|
36 |
+
},
|
37 |
+
"SlimFace_regnet_y_800mf": {
|
38 |
+
"repo": "danhtran2mind/SlimFace-sample-checkpoints",
|
39 |
+
"filename": "SlimFace_regnet_y_800mf_full_model.pth",
|
40 |
+
"local_dir": "ckpts"
|
41 |
+
},
|
42 |
+
"SlimFace_vit_b_16": {
|
43 |
+
"repo": "danhtran2mind/SlimFace-sample-checkpoints",
|
44 |
+
"filename": "SlimFace_vit_b_16_full_model.pth",
|
45 |
+
"local_dir": "ckpts"
|
46 |
+
},
|
47 |
+
"SlimFace_mapping": {
|
48 |
+
"repo": "danhtran2mind/SlimFace-sample-checkpoints",
|
49 |
+
"filename": "index_to_class_mapping.json",
|
50 |
+
"local_dir": "ckpts"
|
51 |
+
}
|
52 |
+
}
|
53 |
+
|
54 |
+
def download_models(model_name=None):
|
55 |
+
"""Download specified models from model_configs to their respective local directories.
|
56 |
+
|
57 |
+
Args:
|
58 |
+
model_name (str, optional): Specific model to download. If None, download all models.
|
59 |
+
"""
|
60 |
+
# Determine files to download
|
61 |
+
if model_name:
|
62 |
+
if model_name not in model_configs:
|
63 |
+
raise ValueError(f"Model {model_name} not found in available models: {list(model_configs.keys())}")
|
64 |
+
configs_to_download = [model_configs[model_name]]
|
65 |
+
else:
|
66 |
+
configs_to_download = list(model_configs.values())
|
67 |
+
|
68 |
+
for config in configs_to_download:
|
69 |
+
repo_id = config["repo"]
|
70 |
+
filename = config["filename"]
|
71 |
+
local_dir = config["local_dir"]
|
72 |
+
|
73 |
+
# Ensure the local directory exists
|
74 |
+
os.makedirs(local_dir, exist_ok=True)
|
75 |
+
|
76 |
+
try:
|
77 |
+
snapshot_download(
|
78 |
+
repo_id=repo_id,
|
79 |
+
local_dir=local_dir,
|
80 |
+
local_dir_use_symlinks=False,
|
81 |
+
allow_patterns=[filename],
|
82 |
+
cache_dir=None,
|
83 |
+
revision="main"
|
84 |
+
)
|
85 |
+
print(f"Downloaded {filename} to {local_dir}")
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error downloading {filename}: {e}")
|
88 |
+
|
89 |
+
def main():
|
90 |
+
"""Parse command-line arguments and initiate model download."""
|
91 |
+
parser = argparse.ArgumentParser(description="Download models from Hugging Face Hub.")
|
92 |
+
parser.add_argument(
|
93 |
+
"--model",
|
94 |
+
type=str,
|
95 |
+
default=None,
|
96 |
+
choices=list(model_configs.keys()),
|
97 |
+
help="Specific model to download. If not provided, all models are downloaded."
|
98 |
+
)
|
99 |
+
args = parser.parse_args()
|
100 |
+
|
101 |
+
download_models(args.model)
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
main()
|
SlimFace/scripts/process_dataset.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import zipfile
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
from tqdm import tqdm
|
6 |
+
from sklearn.model_selection import train_test_split
|
7 |
+
import imgaug.augmenters as iaa
|
8 |
+
import sys
|
9 |
+
import argparse
|
10 |
+
import shutil
|
11 |
+
|
12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
13 |
+
from src.slimface.data.data_processing import process_image
|
14 |
+
|
15 |
+
def download_and_split_kaggle_dataset(
|
16 |
+
dataset_slug,
|
17 |
+
base_dir="data",
|
18 |
+
augment=False,
|
19 |
+
random_state=42,
|
20 |
+
test_split_rate=0.2,
|
21 |
+
rotation_range=15,
|
22 |
+
source_subdir="Original Images/Original Images",
|
23 |
+
delete_raw=False
|
24 |
+
):
|
25 |
+
"""Download a Kaggle dataset, split it into train/validation sets, and process images for face recognition.
|
26 |
+
|
27 |
+
Skips downloading if ZIP exists and unzipping if raw folder contains files.
|
28 |
+
Optionally deletes the raw folder to save storage.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
dataset_slug (str): Dataset slug in 'username/dataset-name' format.
|
32 |
+
base_dir (str): Base directory for storing dataset.
|
33 |
+
augment (bool): Whether to apply data augmentation to training images.
|
34 |
+
random_state (int): Random seed for reproducibility in train-test split.
|
35 |
+
test_split_rate (float): Proportion of data to use for validation (between 0 and 1).
|
36 |
+
rotation_range (int): Maximum rotation angle in degrees for augmentation.
|
37 |
+
source_subdir (str): Subdirectory within raw_dir containing images.
|
38 |
+
delete_raw (bool): Whether to delete the raw folder after processing to save storage.
|
39 |
+
|
40 |
+
Raises:
|
41 |
+
ValueError: If test_split_rate is not between 0 and 1 or dataset_slug is invalid.
|
42 |
+
FileNotFoundError: If source directory is not found.
|
43 |
+
Exception: If dataset download fails or other errors occur.
|
44 |
+
"""
|
45 |
+
try:
|
46 |
+
# Validate test_split_rate
|
47 |
+
if not 0 < test_split_rate < 1:
|
48 |
+
raise ValueError("test_split_rate must be between 0 and 1")
|
49 |
+
|
50 |
+
# Set up directories
|
51 |
+
raw_dir = os.path.join(base_dir, "raw")
|
52 |
+
processed_dir = os.path.join(base_dir, "processed_ds")
|
53 |
+
train_dir = os.path.join(processed_dir, "train_data")
|
54 |
+
val_dir = os.path.join(processed_dir, "val_data")
|
55 |
+
zip_path = os.path.join(raw_dir, "dataset.zip")
|
56 |
+
|
57 |
+
os.makedirs(raw_dir, exist_ok=True)
|
58 |
+
os.makedirs(processed_dir, exist_ok=True)
|
59 |
+
|
60 |
+
# Check if ZIP file already exists
|
61 |
+
if os.path.exists(zip_path):
|
62 |
+
print(f"ZIP file already exists at {zip_path}, skipping download.")
|
63 |
+
else:
|
64 |
+
# Download dataset with progress bar
|
65 |
+
username, dataset_name = dataset_slug.split("/")
|
66 |
+
if not (username and dataset_name):
|
67 |
+
raise ValueError("Invalid dataset slug format. Expected 'username/dataset-name'")
|
68 |
+
|
69 |
+
dataset_url = f"https://www.kaggle.com/api/v1/datasets/download/{username}/{dataset_name}"
|
70 |
+
print(f"Downloading dataset {dataset_slug}...")
|
71 |
+
response = requests.get(dataset_url, stream=True)
|
72 |
+
if response.status_code != 200:
|
73 |
+
raise Exception(f"Failed to download dataset: {response.status_code}")
|
74 |
+
|
75 |
+
total_size = int(response.headers.get("content-length", 0))
|
76 |
+
with open(zip_path, "wb") as file, tqdm(
|
77 |
+
desc="Downloading dataset",
|
78 |
+
total=total_size,
|
79 |
+
unit="B",
|
80 |
+
unit_scale=True,
|
81 |
+
unit_divisor=1024,
|
82 |
+
) as pbar:
|
83 |
+
for chunk in response.iter_content(chunk_size=8192):
|
84 |
+
if chunk:
|
85 |
+
file.write(chunk)
|
86 |
+
pbar.update(len(chunk))
|
87 |
+
|
88 |
+
# # Check if raw directory contains files, excluding the ZIP file
|
89 |
+
# zip_filename = os.path.basename(zip_path)
|
90 |
+
# if os.path.exists(raw_dir) and any(file != zip_filename for file in os.listdir(raw_dir)):
|
91 |
+
# print(f"Raw directory {raw_dir} already contains files, skipping extraction.")
|
92 |
+
# else:
|
93 |
+
# Extract dataset
|
94 |
+
print("Extracting dataset...")
|
95 |
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
96 |
+
zip_ref.extractall(raw_dir)
|
97 |
+
|
98 |
+
# Define source directory
|
99 |
+
source_dir = os.path.join(raw_dir, source_subdir)
|
100 |
+
if not os.path.exists(source_dir):
|
101 |
+
raise FileNotFoundError(f"Source directory {source_dir} not found")
|
102 |
+
|
103 |
+
# Group files by person (subfolder names)
|
104 |
+
person_files = {}
|
105 |
+
for person in os.listdir(source_dir):
|
106 |
+
person_dir = os.path.join(source_dir, person)
|
107 |
+
if os.path.isdir(person_dir):
|
108 |
+
person_files[person] = [
|
109 |
+
f for f in os.listdir(person_dir)
|
110 |
+
if os.path.isfile(os.path.join(person_dir, f))
|
111 |
+
and f.lower().endswith((".png", ".jpg", ".jpeg"))
|
112 |
+
]
|
113 |
+
|
114 |
+
# Define augmentation pipeline
|
115 |
+
if augment:
|
116 |
+
aug = iaa.Sequential([
|
117 |
+
iaa.Fliplr(p=1.0),
|
118 |
+
iaa.Sometimes(
|
119 |
+
0.5,
|
120 |
+
iaa.Affine(rotate=(-rotation_range, rotation_range))
|
121 |
+
),
|
122 |
+
])
|
123 |
+
else:
|
124 |
+
aug = None
|
125 |
+
|
126 |
+
# Process and split files with progress bar
|
127 |
+
total_files = sum(len(images) for images in person_files.values())
|
128 |
+
with tqdm(total=total_files, desc="Processing and copying files", unit="file") as pbar:
|
129 |
+
for person, images in person_files.items():
|
130 |
+
# Set up directories for this person
|
131 |
+
train_person_dir = os.path.join(train_dir, person)
|
132 |
+
val_person_dir = os.path.join(val_dir, person)
|
133 |
+
temp_dir = os.path.join(processed_dir, "temp")
|
134 |
+
os.makedirs(train_person_dir, exist_ok=True)
|
135 |
+
os.makedirs(val_person_dir, exist_ok=True)
|
136 |
+
os.makedirs(temp_dir, exist_ok=True)
|
137 |
+
|
138 |
+
all_image_filenames = []
|
139 |
+
|
140 |
+
# Process images and create augmentations before splitting
|
141 |
+
for img in images:
|
142 |
+
src_path = os.path.join(source_dir, person, img)
|
143 |
+
saved_images = process_image(src_path, temp_dir, aug if augment else None)
|
144 |
+
all_image_filenames.extend(saved_images)
|
145 |
+
pbar.update(1)
|
146 |
+
|
147 |
+
# Split all images (original and augmented) for this person
|
148 |
+
train_images_filenames, val_images_filenames = train_test_split(
|
149 |
+
all_image_filenames,
|
150 |
+
test_size=test_split_rate,
|
151 |
+
random_state=random_state,
|
152 |
+
)
|
153 |
+
|
154 |
+
# Move images to final train/val directories
|
155 |
+
for img in all_image_filenames:
|
156 |
+
src = os.path.join(temp_dir, img)
|
157 |
+
if not os.path.exists(src):
|
158 |
+
print(f"Warning: File {src} not found, skipping.")
|
159 |
+
continue
|
160 |
+
if img in train_images_filenames:
|
161 |
+
dst = os.path.join(train_person_dir, img)
|
162 |
+
else:
|
163 |
+
dst = os.path.join(val_person_dir, img)
|
164 |
+
os.rename(src, dst)
|
165 |
+
|
166 |
+
# Clean up temporary directory for this person
|
167 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
168 |
+
print(f"\nCleaned up temp directory for {person}")
|
169 |
+
|
170 |
+
# Optionally delete raw folder to save storage
|
171 |
+
if delete_raw:
|
172 |
+
print(f"Deleting raw folder {raw_dir} to save storage...")
|
173 |
+
shutil.rmtree(raw_dir, ignore_errors=True)
|
174 |
+
print(f"Raw folder {raw_dir} deleted.")
|
175 |
+
|
176 |
+
print(f"Dataset {dataset_slug} downloaded, extracted, processed, and split successfully!")
|
177 |
+
|
178 |
+
except Exception as e:
|
179 |
+
print(f"Error processing dataset: {e}")
|
180 |
+
raise
|
181 |
+
|
182 |
+
if __name__ == "__main__":
|
183 |
+
parser = argparse.ArgumentParser(description="Download and process a Kaggle dataset for face recognition.")
|
184 |
+
parser.add_argument(
|
185 |
+
"--dataset_slug",
|
186 |
+
type=str,
|
187 |
+
default="vasukipatel/face-recognition-dataset",
|
188 |
+
help="Kaggle dataset slug in 'username/dataset-name' format"
|
189 |
+
)
|
190 |
+
parser.add_argument(
|
191 |
+
"--base_dir",
|
192 |
+
type=str,
|
193 |
+
default="./data",
|
194 |
+
help="Base directory for storing dataset"
|
195 |
+
)
|
196 |
+
parser.add_argument(
|
197 |
+
"--augment",
|
198 |
+
action="store_true",
|
199 |
+
help="Enable data augmentation"
|
200 |
+
)
|
201 |
+
parser.add_argument(
|
202 |
+
"--random_state",
|
203 |
+
type=int,
|
204 |
+
default=42,
|
205 |
+
help="Random seed for train-test split reproducibility"
|
206 |
+
)
|
207 |
+
parser.add_argument(
|
208 |
+
"--test_split_rate",
|
209 |
+
type=float,
|
210 |
+
default=0.2,
|
211 |
+
help="Proportion of data for validation (between 0 and 1)"
|
212 |
+
)
|
213 |
+
parser.add_argument(
|
214 |
+
"--rotation_range",
|
215 |
+
type=int,
|
216 |
+
default=15,
|
217 |
+
help="Maximum rotation angle in degrees for augmentation"
|
218 |
+
)
|
219 |
+
parser.add_argument(
|
220 |
+
"--source_subdir",
|
221 |
+
type=str,
|
222 |
+
default="Original Images/Original Images",
|
223 |
+
help="Subdirectory within raw_dir containing images"
|
224 |
+
)
|
225 |
+
parser.add_argument(
|
226 |
+
"--delete_raw",
|
227 |
+
action="store_true",
|
228 |
+
help="Delete the raw folder after processing to save storage"
|
229 |
+
)
|
230 |
+
|
231 |
+
args = parser.parse_args()
|
232 |
+
|
233 |
+
download_and_split_kaggle_dataset(
|
234 |
+
dataset_slug=args.dataset_slug,
|
235 |
+
base_dir=args.base_dir,
|
236 |
+
augment=args.augment,
|
237 |
+
random_state=args.random_state,
|
238 |
+
test_split_rate=args.test_split_rate,
|
239 |
+
rotation_range=args.rotation_range,
|
240 |
+
source_subdir=args.source_subdir,
|
241 |
+
delete_raw=args.delete_raw
|
242 |
+
)
|
SlimFace/scripts/setup_third_party.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
import argparse
|
5 |
+
|
6 |
+
def run_command(command, error_message):
|
7 |
+
"""Run a shell command and handle errors."""
|
8 |
+
try:
|
9 |
+
subprocess.check_call(command, shell=True)
|
10 |
+
except subprocess.CalledProcessError:
|
11 |
+
print(f"Error: {error_message}")
|
12 |
+
sys.exit(1)
|
13 |
+
|
14 |
+
def setup_edgeface(repo_url, third_party_dir, branch=None):
|
15 |
+
"""Set up edgeface as a third-party dependency in the specified directory."""
|
16 |
+
edgeface_dir = os.path.join(third_party_dir, "edgeface")
|
17 |
+
|
18 |
+
# Create third_party directory if it doesn't exist
|
19 |
+
if not os.path.exists(third_party_dir):
|
20 |
+
os.makedirs(third_party_dir)
|
21 |
+
print(f"Created directory: {third_party_dir}")
|
22 |
+
|
23 |
+
# Clone edgeface if not already present
|
24 |
+
if not os.path.exists(edgeface_dir):
|
25 |
+
print(f"Cloning edgeface into {edgeface_dir}...")
|
26 |
+
clone_command = f"git clone {repo_url} {edgeface_dir}"
|
27 |
+
if branch:
|
28 |
+
clone_command = f"git clone -b {branch} {repo_url} {edgeface_dir}"
|
29 |
+
run_command(
|
30 |
+
clone_command,
|
31 |
+
f"Failed to clone edgeface from {repo_url}"
|
32 |
+
)
|
33 |
+
else:
|
34 |
+
print(f"edgeface already exists at {edgeface_dir}")
|
35 |
+
|
36 |
+
# Verify edgeface directory contains expected files
|
37 |
+
if os.path.exists(edgeface_dir) and os.listdir(edgeface_dir):
|
38 |
+
print(f"edgeface setup completed successfully at {edgeface_dir}")
|
39 |
+
else:
|
40 |
+
print(f"Error: edgeface directory is empty or invalid")
|
41 |
+
sys.exit(1)
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
parser = argparse.ArgumentParser(description="Set up edgeface as a third-party dependency.")
|
45 |
+
parser.add_argument(
|
46 |
+
"--repo-url",
|
47 |
+
default="https://github.com/danhtran2mind/edgeface.git",
|
48 |
+
help="Git repository URL for edgeface (default: %(default)s)"
|
49 |
+
)
|
50 |
+
parser.add_argument(
|
51 |
+
"--third-party-dir",
|
52 |
+
default=os.path.join("src", "third_party"),
|
53 |
+
help="Directory to store third-party dependencies (default: %(default)s)"
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"--branch",
|
57 |
+
help="Git branch to clone (optional)"
|
58 |
+
)
|
59 |
+
args = parser.parse_args()
|
60 |
+
|
61 |
+
setup_edgeface(args.repo_url, args.third_party_dir, args.branch)
|
SlimFace/src/slimface/__init__.py
ADDED
File without changes
|
SlimFace/src/slimface/data/data_processing.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
import imgaug.augmenters as iaa
|
5 |
+
import random
|
6 |
+
import uuid
|
7 |
+
|
8 |
+
RANDOM_RATIO = 0.5 # 0.5
|
9 |
+
# TARGET_SIZE = (224, 224) # Standard size for face recognition models
|
10 |
+
|
11 |
+
def process_image(src_path, dest_dir, aug=None):
|
12 |
+
"""
|
13 |
+
Process an image by resizing, normalizing, and optionally augmenting it.
|
14 |
+
Saves both raw and augmented versions of the image.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
src_path (str): Path to the source image
|
18 |
+
dest_dir (str): Destination directory for the raw and augmented images
|
19 |
+
aug (iaa.Sequential, optional): Augmentation pipeline
|
20 |
+
Returns:
|
21 |
+
list: List of saved image filenames (raw and optionally augmented)
|
22 |
+
"""
|
23 |
+
saved_images = []
|
24 |
+
try:
|
25 |
+
# Open and process image
|
26 |
+
img = Image.open(src_path).convert('RGB')
|
27 |
+
|
28 |
+
# Resize image
|
29 |
+
# img = img.resize(TARGET_SIZE, Image.Resampling.LANCZOS)
|
30 |
+
|
31 |
+
# Convert to numpy array and normalize
|
32 |
+
img_array = np.array(img) / 255.0
|
33 |
+
|
34 |
+
# Save raw processed image
|
35 |
+
raw_filename = os.path.basename(src_path)
|
36 |
+
base, ext = os.path.splitext(raw_filename)
|
37 |
+
raw_dest_path = os.path.join(dest_dir, raw_filename)
|
38 |
+
counter = 1
|
39 |
+
while os.path.exists(raw_dest_path):
|
40 |
+
raw_filename = f"{base}_{counter}{ext}"
|
41 |
+
raw_dest_path = os.path.join(dest_dir, raw_filename)
|
42 |
+
counter += 1
|
43 |
+
raw_img = Image.fromarray((img_array * 255).astype(np.uint8))
|
44 |
+
raw_img.save(raw_dest_path, quality=100)
|
45 |
+
saved_images.append(raw_filename)
|
46 |
+
|
47 |
+
# Apply augmentation if specified and save augmented image
|
48 |
+
if aug and random.random() <= RANDOM_RATIO:
|
49 |
+
img_array_aug = aug.augment_image(img_array)
|
50 |
+
# Clip values to ensure valid range after augmentation
|
51 |
+
img_array_aug = np.clip(img_array_aug, 0, 1)
|
52 |
+
# Convert back to image
|
53 |
+
aug_img = Image.fromarray((img_array_aug * 255).astype(np.uint8))
|
54 |
+
# Save augmented image with unique suffix
|
55 |
+
aug_filename = f"aug_{base}_{uuid.uuid4().hex[:8]}{ext}"
|
56 |
+
aug_dest_path = os.path.join(dest_dir, aug_filename)
|
57 |
+
aug_img.save(aug_dest_path, quality=100)
|
58 |
+
saved_images.append(aug_filename)
|
59 |
+
|
60 |
+
except Image.UnidentifiedImageError:
|
61 |
+
print(f"Error: Cannot identify image file {src_path}")
|
62 |
+
except OSError as e:
|
63 |
+
print(f"Error processing image {src_path}: {e}")
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Unexpected error processing image {src_path}: {e}")
|
66 |
+
|
67 |
+
return saved_images
|
SlimFace/src/slimface/data/process_face.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from PIL import Image
|
4 |
+
from tqdm import tqdm
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
8 |
+
|
9 |
+
from models.detection_models import align # Assuming this is available in your project
|
10 |
+
|
11 |
+
def extract_and_save_faces(input_dir, output_dir, algorithm='yolo', resolution=224):
|
12 |
+
"""Preprocess images using face alignment and cache them with specified resolution."""
|
13 |
+
if align is None:
|
14 |
+
raise ImportError("face_alignment package is required for preprocessing.")
|
15 |
+
os.makedirs(output_dir, exist_ok=True)
|
16 |
+
with warnings.catch_warnings():
|
17 |
+
warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
|
18 |
+
for person in sorted(os.listdir(input_dir)):
|
19 |
+
person_path = os.path.join(input_dir, person)
|
20 |
+
if not os.path.isdir(person_path):
|
21 |
+
continue
|
22 |
+
output_person_path = os.path.join(output_dir, person)
|
23 |
+
os.makedirs(output_person_path, exist_ok=True)
|
24 |
+
skipped_count = 0
|
25 |
+
for img_name in tqdm(os.listdir(person_path), desc=f"Processing {person}"):
|
26 |
+
if not img_name.endswith(('.jpg', '.jpeg', '.png')):
|
27 |
+
continue
|
28 |
+
img_path = os.path.join(person_path, img_name)
|
29 |
+
output_img_path = os.path.join(output_person_path, img_name)
|
30 |
+
if os.path.exists(output_img_path):
|
31 |
+
skipped_count += 1
|
32 |
+
continue
|
33 |
+
try:
|
34 |
+
aligned_result = align.get_aligned_face([img_path], algorithm=algorithm)
|
35 |
+
aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else None
|
36 |
+
if aligned_image is None:
|
37 |
+
print(f"Face detection failed for {img_path}, using resized original image")
|
38 |
+
aligned_image = Image.open(img_path).convert('RGB')
|
39 |
+
aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
|
40 |
+
aligned_image.save(output_img_path, quality=100)
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error processing {img_path}: {e}")
|
43 |
+
aligned_image = Image.open(img_path).convert('RGB')
|
44 |
+
aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
|
45 |
+
aligned_image.save(output_img_path, quality=100)
|
46 |
+
if skipped_count > 0:
|
47 |
+
print(f"Skipped {skipped_count} images for {person} that were already processed.")
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
import argparse
|
51 |
+
parser = argparse.ArgumentParser(description="Preprocess and cache images with face alignment.")
|
52 |
+
parser.add_argument('--input_dir', type=str, required=True, help='Directory containing raw images.')
|
53 |
+
parser.add_argument('--output_dir', type=str, required=True, help='Directory to save preprocessed images.')
|
54 |
+
parser.add_argument('--algorithm', type=str, default='yolo', choices=['yolo', 'dlib'], help='Face detection algorithm to use.')
|
55 |
+
parser.add_argument('--resolution', type=int, default=224, help='Resolution for the output images.')
|
56 |
+
|
57 |
+
args = parser.parse_args()
|
58 |
+
extract_and_save_faces(args.input_dir, args.output_dir, args.algorithm, args.resolution)
|
59 |
+
|
60 |
+
# python src/slimface/data/preprocess.py \
|
61 |
+
# --input_dir "data/raw/Original Images/Original Images" \
|
62 |
+
# --output_dir "data/processed/Aligned Images" \
|
63 |
+
# --algorithm "yolo" \
|
64 |
+
# --resolution 224
|
SlimFace/src/slimface/inference/__init__.py
ADDED
File without changes
|
SlimFace/src/slimface/inference/end2end_inference.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as transforms
|
5 |
+
from PIL import Image
|
6 |
+
import argparse
|
7 |
+
import warnings
|
8 |
+
import json
|
9 |
+
|
10 |
+
# Append necessary paths
|
11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "third_party")))
|
12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
13 |
+
|
14 |
+
from edgeface.face_alignment import align as edgeface_align
|
15 |
+
from edgeface.backbones import get_model
|
16 |
+
from models.detection_models import align as align_classifier
|
17 |
+
|
18 |
+
def preprocess_image(image_path, algorithm='yolo', resolution=224):
|
19 |
+
try:
|
20 |
+
with warnings.catch_warnings():
|
21 |
+
warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
|
22 |
+
aligned_result = align_classifier.get_aligned_face([image_path], algorithm=algorithm)
|
23 |
+
aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else Image.open(image_path).convert('RGB')
|
24 |
+
aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Error processing {image_path}: {e}")
|
27 |
+
aligned_image = Image.open(image_path).convert('RGB').resize((resolution, resolution), Image.Resampling.LANCZOS)
|
28 |
+
|
29 |
+
transform = transforms.Compose([
|
30 |
+
transforms.ToTensor(),
|
31 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
32 |
+
])
|
33 |
+
return transform(aligned_image).unsqueeze(0)
|
34 |
+
|
35 |
+
def load_model(model_path):
|
36 |
+
try:
|
37 |
+
model = torch.jit.load(model_path, map_location=torch.device('cpu'))
|
38 |
+
model.eval()
|
39 |
+
return model
|
40 |
+
except Exception as e:
|
41 |
+
raise RuntimeError(f"Failed to load model from {model_path}: {e}")
|
42 |
+
|
43 |
+
def load_class_mapping(index_to_class_mapping_path):
|
44 |
+
try:
|
45 |
+
with open(index_to_class_mapping_path, 'r') as f:
|
46 |
+
idx_to_class = json.load(f)
|
47 |
+
return {int(k): v for k, v in idx_to_class.items()}
|
48 |
+
except Exception as e:
|
49 |
+
raise ValueError(f"Error loading class mapping: {e}")
|
50 |
+
|
51 |
+
def get_edgeface_embeddings(image_path, model_name="edgeface_base", model_dir="ckpts/idiap"):
|
52 |
+
model = get_model(model_name)
|
53 |
+
model.load_state_dict(torch.load(f'{model_dir}/{model_name}.pt', map_location='cpu'))
|
54 |
+
model.eval()
|
55 |
+
|
56 |
+
transform = transforms.Compose([
|
57 |
+
transforms.ToTensor(),
|
58 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
59 |
+
])
|
60 |
+
|
61 |
+
aligned_result = edgeface_align.get_aligned_face(image_path, algorithm='yolo')
|
62 |
+
if not aligned_result:
|
63 |
+
raise ValueError(f"Face alignment failed for {image_path}")
|
64 |
+
|
65 |
+
with torch.no_grad():
|
66 |
+
return model(transform(aligned_result[0][1]).unsqueeze(0))
|
67 |
+
|
68 |
+
def inference_and_confirm(args):
|
69 |
+
idx_to_class = load_class_mapping(args.index_to_class_mapping_path)
|
70 |
+
classifier_model = load_model(args.model_path)
|
71 |
+
device = torch.device('cuda' if torch.cuda.is_available() and args.accelerator == 'gpu' else 'cpu')
|
72 |
+
classifier_model = classifier_model.to(device)
|
73 |
+
|
74 |
+
# Load reference images mapping from JSON file
|
75 |
+
try:
|
76 |
+
with open(args.reference_dict_path, 'r') as f:
|
77 |
+
reference_images = json.load(f)
|
78 |
+
except Exception as e:
|
79 |
+
raise ValueError(f"Error loading reference images from {args.reference_dict_path}: {e}")
|
80 |
+
|
81 |
+
# Handle single image or directory
|
82 |
+
image_paths = [args.unknown_image_path] if args.unknown_image_path.endswith(('.jpg', '.jpeg', '.png')) else [
|
83 |
+
os.path.join(args.unknown_image_path, img) for img in os.listdir(args.unknown_image_path)
|
84 |
+
if img.endswith(('.jpg', '.jpeg', '.png'))
|
85 |
+
]
|
86 |
+
|
87 |
+
results = []
|
88 |
+
with torch.no_grad():
|
89 |
+
for image_path in image_paths:
|
90 |
+
image_tensor = preprocess_image(image_path, args.algorithm, args.resolution).to(device)
|
91 |
+
output = classifier_model(image_tensor)
|
92 |
+
probabilities = torch.softmax(output, dim=1)
|
93 |
+
confidence, predicted = torch.max(probabilities, 1)
|
94 |
+
predicted_class = idx_to_class.get(predicted.item(), "Unknown")
|
95 |
+
|
96 |
+
result = {'image_path': image_path, 'predicted_class': predicted_class, 'confidence': confidence.item()}
|
97 |
+
|
98 |
+
# Validate with EdgeFace embeddings if reference image exists
|
99 |
+
reference_image_path = reference_images.get(predicted_class)
|
100 |
+
if reference_image_path and os.path.exists(reference_image_path):
|
101 |
+
unknown_embedding = get_edgeface_embeddings(image_path, args.edgeface_model_name, args.edgeface_model_dir)
|
102 |
+
reference_embedding = get_edgeface_embeddings(reference_image_path, args.edgeface_model_name, args.edgeface_model_dir)
|
103 |
+
similarity = torch.nn.functional.cosine_similarity(unknown_embedding, reference_embedding).item()
|
104 |
+
result['similarity'] = similarity
|
105 |
+
result['confirmed'] = similarity >= args.similarity_threshold
|
106 |
+
|
107 |
+
results.append(result)
|
108 |
+
|
109 |
+
# {'image_path': 'tests/test_images/dont_know.jpg', 'predicted_class': 'Robert Downey Jr',
|
110 |
+
# 'confidence': 0.9292604923248291, 'similarity': 0.603316068649292, 'confirmed': True}
|
111 |
+
|
112 |
+
return results
|
113 |
+
|
114 |
+
def main(args):
|
115 |
+
results = inference_and_confirm(args)
|
116 |
+
for result in results:
|
117 |
+
print(f"Image: {result['image_path']}, Predicted Class: {result['predicted_class']}, "
|
118 |
+
f"Confidence: {result['confidence']:.4f}, Similarity: {result.get('similarity', 'N/A'):.4f}, "
|
119 |
+
f"Confirmed: {result.get('confirmed', 'N/A')}")
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
parser = argparse.ArgumentParser(description='Face classification with EdgeFace embedding validation.')
|
123 |
+
parser.add_argument('--unknown_image_path', type=str, required=True, help='Path to image or directory.')
|
124 |
+
parser.add_argument('--reference_dict_path', type=str, required=True, help='Path to JSON file mapping classes to reference image paths.')
|
125 |
+
parser.add_argument('--index_to_class_mapping_path', type=str, required=True, help='Path to index-to-class JSON.')
|
126 |
+
parser.add_argument('--model_path', type=str, required=True, help='Path to classifier model (.pth).')
|
127 |
+
parser.add_argument('--edgeface_model_name', type=str, default='edgeface_base', help='EdgeFace model name.')
|
128 |
+
parser.add_argument('--edgeface_model_dir', type=str, default='ckpts/idiap', help='EdgeFace model directory.')
|
129 |
+
parser.add_argument('--algorithm', type=str, default='yolo', choices=['mtcnn', 'yolo'], help='Face detection algorithm.')
|
130 |
+
parser.add_argument('--accelerator', type=str, default='auto', choices=['cpu', 'gpu', 'auto'], help='Accelerator type.')
|
131 |
+
parser.add_argument('--resolution', type=int, default=224, help='Input image resolution.')
|
132 |
+
parser.add_argument('--similarity_threshold', type=float, default=0.6, help='Cosine similarity threshold.')
|
133 |
+
|
134 |
+
args = parser.parse_args()
|
135 |
+
main(args)
|
136 |
+
|
137 |
+
# python src/slimface/inference/end2end_inference.py \
|
138 |
+
# --unknown_image_path tests/test_images/dont_know.jpg \
|
139 |
+
# --reference_dict_path tests/reference_image_data.json \
|
140 |
+
# --index_to_class_mapping_path /content/SlimFace/ckpts/index_to_class_mapping.json \
|
141 |
+
# --model_path /content/SlimFace/ckpts/SlimFace_efficientnet_b3_full_model.pth \
|
142 |
+
# --edgeface_model_name edgeface_base \
|
143 |
+
# --similarity_threshold 0.6
|
SlimFace/src/slimface/inference/inference.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as transforms
|
5 |
+
from PIL import Image
|
6 |
+
import argparse
|
7 |
+
import warnings
|
8 |
+
import json
|
9 |
+
|
10 |
+
# Append the parent directory's 'models/edgeface' folder to the system path
|
11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
12 |
+
|
13 |
+
from models.detection_models import align
|
14 |
+
|
15 |
+
def preprocess_image(image_path, algorithm='yolo', resolution=224):
|
16 |
+
"""Preprocess a single image using face alignment and specified resolution."""
|
17 |
+
if align is None:
|
18 |
+
raise ImportError("face_alignment package is required for preprocessing.")
|
19 |
+
try:
|
20 |
+
with warnings.catch_warnings():
|
21 |
+
warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
|
22 |
+
aligned_result = align.get_aligned_face([image_path], algorithm=algorithm)
|
23 |
+
aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else None
|
24 |
+
if aligned_image is None:
|
25 |
+
print(f"Face detection failed for {image_path}, using resized original image")
|
26 |
+
aligned_image = Image.open(image_path).convert('RGB')
|
27 |
+
aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
|
28 |
+
except Exception as e:
|
29 |
+
print(f"Error processing {image_path}: {e}")
|
30 |
+
aligned_image = Image.open(image_path).convert('RGB')
|
31 |
+
aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
|
32 |
+
|
33 |
+
transform = transforms.Compose([
|
34 |
+
transforms.ToTensor(),
|
35 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
36 |
+
])
|
37 |
+
image_tensor = transform(aligned_image).unsqueeze(0) # Add batch dimension
|
38 |
+
return image_tensor
|
39 |
+
|
40 |
+
def load_model(model_path):
|
41 |
+
"""Load the trained model in TorchScript format."""
|
42 |
+
try:
|
43 |
+
model = torch.jit.load(model_path, map_location=torch.device('cpu'))
|
44 |
+
model.eval()
|
45 |
+
return model
|
46 |
+
except Exception as e:
|
47 |
+
raise RuntimeError(f"Failed to load TorchScript model from {model_path}: {e}")
|
48 |
+
|
49 |
+
def load_class_mapping(index_to_class_mapping_path):
|
50 |
+
"""Load class-to-index mapping from the JSON file."""
|
51 |
+
try:
|
52 |
+
with open(index_to_class_mapping_path, 'r') as f:
|
53 |
+
idx_to_class = json.load(f)
|
54 |
+
# Convert string keys (from JSON) to integers
|
55 |
+
idx_to_class = {int(k): v for k, v in idx_to_class.items()}
|
56 |
+
return idx_to_class
|
57 |
+
except FileNotFoundError:
|
58 |
+
raise FileNotFoundError(f"Index to class mapping file {index_to_class_mapping_path} not found.")
|
59 |
+
except Exception as e:
|
60 |
+
raise ValueError(f"Error loading index to class mapping: {e}")
|
61 |
+
|
62 |
+
def inference(args):
|
63 |
+
# Load class mapping from JSON file
|
64 |
+
idx_to_class = load_class_mapping(args.index_to_class_mapping_path)
|
65 |
+
|
66 |
+
# Load model
|
67 |
+
model = load_model(args.model_path)
|
68 |
+
|
69 |
+
# Process input images
|
70 |
+
device = torch.device('cuda' if torch.cuda.is_available() and args.accelerator == 'gpu' else 'cpu')
|
71 |
+
model = model.to(device)
|
72 |
+
|
73 |
+
image_paths = []
|
74 |
+
if os.path.isdir(args.input_path):
|
75 |
+
for img_name in os.listdir(args.input_path):
|
76 |
+
if img_name.endswith(('.jpg', '.jpeg', '.png')):
|
77 |
+
image_paths.append(os.path.join(args.input_path, img_name))
|
78 |
+
else:
|
79 |
+
if args.input_path.endswith(('.jpg', '.jpeg', '.png')):
|
80 |
+
image_paths.append(args.input_path)
|
81 |
+
else:
|
82 |
+
raise ValueError("Input path must be a directory or a valid image file.")
|
83 |
+
|
84 |
+
# Perform inference
|
85 |
+
results = []
|
86 |
+
with torch.no_grad():
|
87 |
+
for image_path in image_paths:
|
88 |
+
image_tensor = preprocess_image(image_path, algorithm=args.algorithm, resolution=args.resolution)
|
89 |
+
image_tensor = image_tensor.to(device)
|
90 |
+
output = model(image_tensor)
|
91 |
+
probabilities = torch.softmax(output, dim=1)
|
92 |
+
confidence, predicted = torch.max(probabilities, 1)
|
93 |
+
predicted_class = idx_to_class.get(predicted.item(), "Unknown")
|
94 |
+
results.append({
|
95 |
+
'image_path': image_path,
|
96 |
+
'predicted_class': predicted_class,
|
97 |
+
'confidence': confidence.item()
|
98 |
+
})
|
99 |
+
def main(args):
|
100 |
+
results = inference(args)
|
101 |
+
# Output results
|
102 |
+
for result in results:
|
103 |
+
print(f"Image: {result['image_path']}")
|
104 |
+
print(f"Predicted Class: {result['predicted_class']}")
|
105 |
+
print(f"Confidence: {result['confidence']:.4f}")
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == '__main__':
|
109 |
+
parser = argparse.ArgumentParser(description='Perform inference with a trained face classification model.')
|
110 |
+
parser.add_argument('--input_path', type=str, required=True,
|
111 |
+
help='Path to an image or directory of images for inference.')
|
112 |
+
parser.add_argument('--index_to_class_mapping_path', type=str, required=True,
|
113 |
+
help='Path to the JSON file containing index to class mapping.')
|
114 |
+
parser.add_argument('--model_path', type=str, required=True,
|
115 |
+
help='Path to the trained full model in TorchScript format (.pth file).')
|
116 |
+
parser.add_argument('--algorithm', type=str, default='yolo',
|
117 |
+
choices=['mtcnn', 'yolo'],
|
118 |
+
help='Face detection algorithm to use (mtcnn or yolo).')
|
119 |
+
parser.add_argument('--accelerator', type=str, default='auto',
|
120 |
+
choices=['cpu', 'gpu', 'auto'],
|
121 |
+
help='Accelerator type for inference.')
|
122 |
+
parser.add_argument('--resolution', type=int, default=224,
|
123 |
+
help='Resolution for input images (default: 224).')
|
124 |
+
|
125 |
+
args = parser.parse_args()
|
126 |
+
main(args)
|
SlimFace/src/slimface/models/__init__.py
ADDED
File without changes
|
SlimFace/src/slimface/models/classification_models/__init__.py
ADDED
File without changes
|
SlimFace/src/slimface/models/classification_models/alls.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
class FaceClassifier(nn.Module):
|
5 |
+
"""Face classification model with a configurable head."""
|
6 |
+
def __init__(self, base_model, num_classes, model_name, model_configs):
|
7 |
+
super(FaceClassifier, self).__init__()
|
8 |
+
self.base_model = base_model
|
9 |
+
self.model_name = model_name
|
10 |
+
|
11 |
+
# Determine the feature extraction method and output shape
|
12 |
+
with torch.no_grad():
|
13 |
+
dummy_input = torch.zeros(1, 3, model_configs[model_name]['resolution'], model_configs[model_name]['resolution'])
|
14 |
+
features = base_model(dummy_input)
|
15 |
+
if len(features.shape) == 4: # Spatial feature map (batch, channels, height, width)
|
16 |
+
in_channels = features.shape[1]
|
17 |
+
self.feature_type = 'spatial'
|
18 |
+
self.feature_dim = in_channels
|
19 |
+
elif len(features.shape) == 2: # Flattened feature vector (batch, features)
|
20 |
+
in_channels = features.shape[1]
|
21 |
+
self.feature_type = 'flat'
|
22 |
+
self.feature_dim = in_channels
|
23 |
+
else:
|
24 |
+
raise ValueError(f"Unexpected feature shape from base model {model_name}: {features.shape}")
|
25 |
+
|
26 |
+
# Define the classifier head based on feature type
|
27 |
+
if self.feature_type == 'flat' or 'vit' in model_name:
|
28 |
+
self.conv_head = nn.Sequential(
|
29 |
+
nn.Linear(self.feature_dim, 512),
|
30 |
+
nn.BatchNorm1d(512),
|
31 |
+
nn.ReLU(),
|
32 |
+
nn.Dropout(0.5),
|
33 |
+
nn.Linear(512, 256),
|
34 |
+
nn.BatchNorm1d(256),
|
35 |
+
nn.ReLU(),
|
36 |
+
nn.Linear(256, num_classes)
|
37 |
+
)
|
38 |
+
else:
|
39 |
+
self.conv_head = nn.Sequential(
|
40 |
+
nn.Conv2d(self.feature_dim, 512, kernel_size=3, padding=1),
|
41 |
+
nn.BatchNorm2d(512),
|
42 |
+
nn.ReLU(),
|
43 |
+
nn.Dropout2d(0.5),
|
44 |
+
nn.Conv2d(512, 256, kernel_size=3, padding=1),
|
45 |
+
nn.BatchNorm2d(256),
|
46 |
+
nn.ReLU(),
|
47 |
+
nn.AdaptiveAvgPool2d(1),
|
48 |
+
nn.Flatten(),
|
49 |
+
nn.Linear(256, num_classes)
|
50 |
+
)
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
features = self.base_model(x)
|
54 |
+
output = self.conv_head(features)
|
55 |
+
return output
|
SlimFace/src/slimface/models/classification_models/efficient_v1.py
ADDED
File without changes
|
SlimFace/src/slimface/models/classification_models/efficient_v2.py
ADDED
File without changes
|
SlimFace/src/slimface/models/classification_models/regnet.py
ADDED
File without changes
|
SlimFace/src/slimface/models/classification_models/vit.py
ADDED
File without changes
|
SlimFace/src/slimface/models/detection_models/align.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from PIL import Image
|
3 |
+
from typing import Union, List, Tuple
|
4 |
+
from . import mtcnn
|
5 |
+
from .face_yolo import face_yolo_detection
|
6 |
+
|
7 |
+
# Device configuration
|
8 |
+
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
# Initialize MTCNN model
|
11 |
+
MTCNN_MODEL = mtcnn.MTCNN(device=DEVICE, crop_size=(112, 112))
|
12 |
+
|
13 |
+
def add_image_padding(pil_img: Image.Image, top: int, right: int, bottom: int, left: int,
|
14 |
+
color: Tuple[int, int, int] = (0, 0, 0)) -> Image.Image:
|
15 |
+
"""Add padding to a PIL image."""
|
16 |
+
width, height = pil_img.size
|
17 |
+
new_width, new_height = width + right + left, height + top + bottom
|
18 |
+
padded_img = Image.new(pil_img.mode, (new_width, new_height), color)
|
19 |
+
padded_img.paste(pil_img, (left, top))
|
20 |
+
return padded_img
|
21 |
+
|
22 |
+
def detect_faces_mtcnn(image: Union[str, Image.Image]) -> Tuple[Union[list, None], Union[Image.Image, None]]:
|
23 |
+
"""Detect and align faces using MTCNN model."""
|
24 |
+
if isinstance(image, str):
|
25 |
+
image = Image.open(image).convert('RGB')
|
26 |
+
|
27 |
+
if not isinstance(image, Image.Image):
|
28 |
+
raise TypeError("Input must be a PIL Image or path to an image")
|
29 |
+
|
30 |
+
try:
|
31 |
+
bboxes, faces = MTCNN_MODEL.align_multi(image, limit=1)
|
32 |
+
return bboxes[0] if bboxes else None, faces[0] if faces else None
|
33 |
+
except Exception as e:
|
34 |
+
print(f"MTCNN face detection failed: {e}")
|
35 |
+
return None, None
|
36 |
+
|
37 |
+
def get_aligned_face(image_input: Union[str, List[str]],
|
38 |
+
algorithm: str = 'mtcnn') -> List[Tuple[Union[list, None], Union[Image.Image, None]]]:
|
39 |
+
"""Get aligned faces from image(s) using specified algorithm."""
|
40 |
+
if algorithm not in ['mtcnn', 'yolo']:
|
41 |
+
raise ValueError("Algorithm must be 'mtcnn' or 'yolo'")
|
42 |
+
|
43 |
+
# Convert single image path to list for consistent processing
|
44 |
+
image_paths = [image_input] if isinstance(image_input, str) else image_input
|
45 |
+
if not isinstance(image_paths, list):
|
46 |
+
raise TypeError("Input must be a string or list of strings")
|
47 |
+
|
48 |
+
if algorithm == 'mtcnn':
|
49 |
+
return [detect_faces_mtcnn(path) for path in image_paths]
|
50 |
+
|
51 |
+
# YOLO detection
|
52 |
+
results = face_yolo_detection(
|
53 |
+
image_paths,
|
54 |
+
use_batch=True,
|
55 |
+
device=DEVICE
|
56 |
+
)
|
57 |
+
return list(results)
|
SlimFace/src/slimface/models/detection_models/face_yolo.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ultralytics import YOLO
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
import glob
|
7 |
+
import sys
|
8 |
+
import argparse
|
9 |
+
import torch
|
10 |
+
|
11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
12 |
+
|
13 |
+
from utils import download_yolo_face_detection
|
14 |
+
|
15 |
+
def initialize_yolo_model(yolo_model_path):
|
16 |
+
"""Initialize YOLO model with specified device."""
|
17 |
+
# if device.startswith('cuda') and not torch.cuda.is_available():
|
18 |
+
# print("Warning: CUDA not available, falling back to CPU.")
|
19 |
+
# device = 'cpu'
|
20 |
+
if not os.path.exists(yolo_model_path):
|
21 |
+
download_yolo_face_detection.download_yolo_face_detection_model()
|
22 |
+
return YOLO(yolo_model_path)
|
23 |
+
|
24 |
+
def process_image_results(image, image_rgb, boxes):
|
25 |
+
"""Process bounding boxes and crop faces for a single image."""
|
26 |
+
bounding_boxes, cropped_faces = [], []
|
27 |
+
for box in boxes:
|
28 |
+
x1, y1, x2, y2 = map(int, box)
|
29 |
+
if x2 > x1 and y2 > y1 and x1 >= 0 and y1 >= 0 and x2 <= image.shape[1] and y2 <= image.shape[0]:
|
30 |
+
bounding_boxes.append([x1, y1, x2, y2])
|
31 |
+
cropped_face = image_rgb[y1:y2, x1:x2]
|
32 |
+
if cropped_face.size > 0:
|
33 |
+
pil_image = Image.fromarray(cropped_face).resize((112, 112), Image.Resampling.BILINEAR)
|
34 |
+
cropped_faces.append(pil_image)
|
35 |
+
return np.array(bounding_boxes, dtype=np.int32) if bounding_boxes else np.empty((0, 4), dtype=np.int32), cropped_faces
|
36 |
+
|
37 |
+
def process_batch(model, image_paths, all_bounding_boxes, all_cropped_faces, device):
|
38 |
+
"""Process images in batch mode using list comprehensions for efficiency."""
|
39 |
+
# Validate and load images, filter out invalid ones
|
40 |
+
valid_data = [(cv2.imread(path), path) for path in image_paths if os.path.exists(path)]
|
41 |
+
valid_images, valid_image_paths = zip(*[(img, path) for img, path in valid_data if img is not None]) if valid_data else ([], [])
|
42 |
+
|
43 |
+
# Append empty results for invalid images
|
44 |
+
for path in image_paths:
|
45 |
+
if not os.path.exists(path) or cv2.imread(path) is None:
|
46 |
+
all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
|
47 |
+
all_cropped_faces.append([])
|
48 |
+
print(f"Warning: {'not found' if not os.path.exists(path) else 'failed to load'} {path}. Skipping.")
|
49 |
+
|
50 |
+
# Process valid images
|
51 |
+
if valid_images:
|
52 |
+
images_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in valid_images]
|
53 |
+
results = model.predict(source=valid_image_paths, conf=0.25, iou=0.45, verbose=False, device=device)
|
54 |
+
|
55 |
+
# Process results with comprehension
|
56 |
+
for img, rgb, result in zip(valid_images, images_rgb, results):
|
57 |
+
bboxes, faces = process_image_results(img, rgb, result.boxes.xyxy.cpu().numpy())
|
58 |
+
all_bounding_boxes.append(bboxes)
|
59 |
+
all_cropped_faces.append(faces[0] if faces else [])
|
60 |
+
|
61 |
+
def process_individual(model, image_paths, all_bounding_boxes, all_cropped_faces, device):
|
62 |
+
"""Process images individually."""
|
63 |
+
for image_path in image_paths:
|
64 |
+
if not os.path.exists(image_path):
|
65 |
+
print(f"Warning: {image_path} not found. Skipping.")
|
66 |
+
all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
|
67 |
+
all_cropped_faces.append([])
|
68 |
+
continue
|
69 |
+
|
70 |
+
image = cv2.imread(image_path)
|
71 |
+
if image is None:
|
72 |
+
print(f"Warning: Failed to load {image_path}. Skipping.")
|
73 |
+
all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
|
74 |
+
all_cropped_faces.append([])
|
75 |
+
continue
|
76 |
+
|
77 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
78 |
+
results = model(image_path, conf=0.25, iou=0.45, verbose=False, device=device)
|
79 |
+
|
80 |
+
for result in results:
|
81 |
+
boxes = result.boxes.xyxy.cpu().numpy()
|
82 |
+
bboxes, faces = process_image_results(image, image_rgb, boxes)
|
83 |
+
all_bounding_boxes.append(bboxes)
|
84 |
+
all_cropped_faces.append(faces[0] if faces else [])
|
85 |
+
|
86 |
+
def face_yolo_detection(image_paths,
|
87 |
+
yolo_model_path="./ckpts/yolo_face_detection/model.pt",
|
88 |
+
use_batch=True, device='cuda'):
|
89 |
+
"""Perform face detection using YOLOv11 with batch or individual processing on specified device."""
|
90 |
+
model = initialize_yolo_model(yolo_model_path)
|
91 |
+
all_bounding_boxes, all_cropped_faces = [], []
|
92 |
+
|
93 |
+
if use_batch:
|
94 |
+
process_batch(model, image_paths, all_bounding_boxes, all_cropped_faces, device)
|
95 |
+
else:
|
96 |
+
process_individual(model, image_paths, all_bounding_boxes, all_cropped_faces, device)
|
97 |
+
|
98 |
+
return zip(all_bounding_boxes, all_cropped_faces)
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
parser = argparse.ArgumentParser(description="YOLOv11 face detection")
|
102 |
+
parser.add_argument("--use-batch", action="store_true", default=True, help="Use batch processing (default: True)")
|
103 |
+
parser.add_argument("--image-dir", type=str, default="test/test_images", help="Input image directory")
|
104 |
+
parser.add_argument("--yolo-model-path", type=str, default="checkpoints/yolo11_face_detection/model.pt", help="YOLO model path")
|
105 |
+
parser.add_argument("--device", type=str, default="cuda", help="Device to run the model (e.g., 'cuda', 'cpu', 'cuda:0')")
|
106 |
+
|
107 |
+
args = parser.parse_args()
|
108 |
+
|
109 |
+
image_paths = (glob.glob(os.path.join(args.image_dir, "*.[jJ][pP][gG]")) +
|
110 |
+
glob.glob(os.path.join(args.image_dir, "*.[pP][nN][gG]")))
|
111 |
+
|
112 |
+
if args.yolo_model_path:
|
113 |
+
yolo_model_path = args.yolo_model_path
|
114 |
+
else:
|
115 |
+
yolo_model_path = os.path.join("checkpoints", "yolo11_face_detection", "model.pt")
|
116 |
+
|
117 |
+
import time
|
118 |
+
t1 = time.time()
|
119 |
+
results = face_yolo_detection(image_paths, yolo_model_path, args.use_batch, args.device)
|
120 |
+
print("Time taken:", time.time() - t1)
|
121 |
+
|
122 |
+
# Optional: Save or process results
|
123 |
+
# for i, (bboxes, faces) in enumerate(results):
|
124 |
+
# print(f"Image {i}: Bounding Boxes: {bboxes}")
|
125 |
+
# for j, face in enumerate(faces):
|
126 |
+
# face.save(f"face_{i}_{j}.png")
|
127 |
+
|
128 |
+
# Benchmarking (uncomment to use)
|
129 |
+
# import time
|
130 |
+
# num_runs = 50
|
131 |
+
# batch_times, individual_times = [], []
|
132 |
+
|
133 |
+
# # Benchmark batch processing
|
134 |
+
# for _ in range(num_runs):
|
135 |
+
# t1 = time.time()
|
136 |
+
# face_yolo_detection(image_paths, yolo_model_path, use_batch=True, device=args.device)
|
137 |
+
# batch_times.append(time.time() - t1)
|
138 |
+
|
139 |
+
# # Benchmark individual processing
|
140 |
+
# for _ in range(num_runs):
|
141 |
+
# t1 = time.time()
|
142 |
+
# face_yolo_detection(image_paths, yolo_model_path, use_batch=False, device=args.device)
|
143 |
+
# individual_times.append(time.time() - t1)
|
144 |
+
|
145 |
+
# # Calculate and print average times
|
146 |
+
# avg_batch_time = sum(batch_times) / num_runs
|
147 |
+
# avg_individual_time = sum(individual_times) / num_runs
|
148 |
+
|
149 |
+
# print(f"\nBenchmark Results (over {num_runs} runs):")
|
150 |
+
# print(f"Average Batch Processing Time: {avg_batch_time:.4f} seconds")
|
151 |
+
# print(f"Average Individual Processing Time: {avg_individual_time:.4f} seconds")
|
SlimFace/src/slimface/models/detection_models/mtcnn.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from PIL import Image
|
5 |
+
from torch.autograd import Variable
|
6 |
+
|
7 |
+
import sys
|
8 |
+
import os
|
9 |
+
|
10 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
11 |
+
|
12 |
+
from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
|
13 |
+
from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
|
14 |
+
from mtcnn_pytorch.src.first_stage import run_first_stage
|
15 |
+
from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
|
16 |
+
|
17 |
+
|
18 |
+
class MTCNN():
|
19 |
+
def __init__(self, device: str = 'cuda:0', crop_size: Tuple[int, int] = (112, 112)):
|
20 |
+
|
21 |
+
assert device in ['cuda:0', 'cpu']
|
22 |
+
self.device = torch.device(device)
|
23 |
+
assert crop_size in [(112, 112), (96, 112)]
|
24 |
+
self.crop_size = crop_size
|
25 |
+
|
26 |
+
# change working dir to this file location to load npz files. Then switch back
|
27 |
+
cwd = os.getcwd()
|
28 |
+
os.chdir(os.path.dirname(__file__))
|
29 |
+
|
30 |
+
self.pnet = PNet().to(self.device)
|
31 |
+
self.rnet = RNet().to(self.device)
|
32 |
+
self.onet = ONet().to(self.device)
|
33 |
+
self.pnet.eval()
|
34 |
+
self.rnet.eval()
|
35 |
+
self.onet.eval()
|
36 |
+
self.refrence = get_reference_facial_points(default_square=crop_size[0] == crop_size[1])
|
37 |
+
|
38 |
+
self.min_face_size = 20
|
39 |
+
self.thresholds = [0.6,0.7,0.9]
|
40 |
+
self.nms_thresholds = [0.7, 0.7, 0.7]
|
41 |
+
self.factor = 0.85
|
42 |
+
|
43 |
+
|
44 |
+
os.chdir(cwd)
|
45 |
+
|
46 |
+
def align(self, img):
|
47 |
+
_, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
|
48 |
+
facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
|
49 |
+
warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
|
50 |
+
return Image.fromarray(warped_face)
|
51 |
+
|
52 |
+
def align_multi(self, img, limit=None):
|
53 |
+
boxes, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
|
54 |
+
if limit:
|
55 |
+
boxes = boxes[:limit]
|
56 |
+
landmarks = landmarks[:limit]
|
57 |
+
faces = []
|
58 |
+
for landmark in landmarks:
|
59 |
+
facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
|
60 |
+
warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
|
61 |
+
faces.append(Image.fromarray(warped_face))
|
62 |
+
return boxes, faces
|
63 |
+
|
64 |
+
def detect_faces(self, image, min_face_size, thresholds, nms_thresholds, factor):
|
65 |
+
"""
|
66 |
+
Arguments:
|
67 |
+
image: an instance of PIL.Image.
|
68 |
+
min_face_size: a float number.
|
69 |
+
thresholds: a list of length 3.
|
70 |
+
nms_thresholds: a list of length 3.
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
|
74 |
+
bounding boxes and facial landmarks.
|
75 |
+
"""
|
76 |
+
|
77 |
+
# BUILD AN IMAGE PYRAMID
|
78 |
+
width, height = image.size
|
79 |
+
min_length = min(height, width)
|
80 |
+
|
81 |
+
min_detection_size = 12
|
82 |
+
# factor = 0.707 # sqrt(0.5)
|
83 |
+
|
84 |
+
# scales for scaling the image
|
85 |
+
scales = []
|
86 |
+
|
87 |
+
# scales the image so that
|
88 |
+
# minimum size that we can detect equals to
|
89 |
+
# minimum face size that we want to detect
|
90 |
+
m = min_detection_size / min_face_size
|
91 |
+
min_length *= m
|
92 |
+
|
93 |
+
factor_count = 0
|
94 |
+
while min_length > min_detection_size:
|
95 |
+
scales.append(m * factor**factor_count)
|
96 |
+
min_length *= factor
|
97 |
+
factor_count += 1
|
98 |
+
|
99 |
+
# STAGE 1
|
100 |
+
|
101 |
+
# it will be returned
|
102 |
+
bounding_boxes = []
|
103 |
+
|
104 |
+
with torch.no_grad():
|
105 |
+
# run P-Net on different scales
|
106 |
+
for s in scales:
|
107 |
+
boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
|
108 |
+
bounding_boxes.append(boxes)
|
109 |
+
|
110 |
+
# collect boxes (and offsets, and scores) from different scales
|
111 |
+
bounding_boxes = [i for i in bounding_boxes if i is not None]
|
112 |
+
if len(bounding_boxes) == 0:
|
113 |
+
return [], []
|
114 |
+
bounding_boxes = np.vstack(bounding_boxes)
|
115 |
+
|
116 |
+
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
|
117 |
+
bounding_boxes = bounding_boxes[keep]
|
118 |
+
|
119 |
+
# use offsets predicted by pnet to transform bounding boxes
|
120 |
+
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
|
121 |
+
# shape [n_boxes, 5]
|
122 |
+
|
123 |
+
bounding_boxes = convert_to_square(bounding_boxes)
|
124 |
+
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
|
125 |
+
|
126 |
+
# STAGE 2
|
127 |
+
|
128 |
+
img_boxes = get_image_boxes(bounding_boxes, image, size=24)
|
129 |
+
img_boxes = torch.FloatTensor(img_boxes).to(self.device)
|
130 |
+
|
131 |
+
output = self.rnet(img_boxes)
|
132 |
+
offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
|
133 |
+
probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]
|
134 |
+
|
135 |
+
keep = np.where(probs[:, 1] > thresholds[1])[0]
|
136 |
+
bounding_boxes = bounding_boxes[keep]
|
137 |
+
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
|
138 |
+
offsets = offsets[keep]
|
139 |
+
|
140 |
+
keep = nms(bounding_boxes, nms_thresholds[1])
|
141 |
+
bounding_boxes = bounding_boxes[keep]
|
142 |
+
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
|
143 |
+
bounding_boxes = convert_to_square(bounding_boxes)
|
144 |
+
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
|
145 |
+
|
146 |
+
# STAGE 3
|
147 |
+
|
148 |
+
img_boxes = get_image_boxes(bounding_boxes, image, size=48)
|
149 |
+
if len(img_boxes) == 0:
|
150 |
+
return [], []
|
151 |
+
img_boxes = torch.FloatTensor(img_boxes).to(self.device)
|
152 |
+
output = self.onet(img_boxes)
|
153 |
+
landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
|
154 |
+
offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
|
155 |
+
probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]
|
156 |
+
|
157 |
+
keep = np.where(probs[:, 1] > thresholds[2])[0]
|
158 |
+
bounding_boxes = bounding_boxes[keep]
|
159 |
+
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
|
160 |
+
offsets = offsets[keep]
|
161 |
+
landmarks = landmarks[keep]
|
162 |
+
|
163 |
+
# compute landmark points
|
164 |
+
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
|
165 |
+
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
|
166 |
+
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
|
167 |
+
landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
|
168 |
+
landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
|
169 |
+
|
170 |
+
bounding_boxes = calibrate_box(bounding_boxes, offsets)
|
171 |
+
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
|
172 |
+
bounding_boxes = bounding_boxes[keep]
|
173 |
+
landmarks = landmarks[keep]
|
174 |
+
|
175 |
+
return bounding_boxes, landmarks
|
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints
|
2 |
+
__pycache__
|
3 |
+
|
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2017 Dan Antoshchenko
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MTCNN
|
2 |
+
|
3 |
+
`pytorch` implementation of **inference stage** of face detection algorithm described in
|
4 |
+
[Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
|
5 |
+
|
6 |
+
## Example
|
7 |
+

|
8 |
+
|
9 |
+
## How to use it
|
10 |
+
Just download the repository and then do this
|
11 |
+
```python
|
12 |
+
from src import detect_faces
|
13 |
+
from PIL import Image
|
14 |
+
|
15 |
+
image = Image.open('image.jpg')
|
16 |
+
bounding_boxes, landmarks = detect_faces(image)
|
17 |
+
```
|
18 |
+
For examples see `test_on_images.ipynb`.
|
19 |
+
|
20 |
+
## Requirements
|
21 |
+
* pytorch 0.2
|
22 |
+
* Pillow, numpy
|
23 |
+
|
24 |
+
## Credit
|
25 |
+
This implementation is heavily inspired by:
|
26 |
+
* [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
|
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.caffemodel
ADDED
Binary file (28.2 kB). View file
|
|
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.prototxt
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "PNet"
|
2 |
+
input: "data"
|
3 |
+
input_dim: 1
|
4 |
+
input_dim: 3
|
5 |
+
input_dim: 12
|
6 |
+
input_dim: 12
|
7 |
+
|
8 |
+
layer {
|
9 |
+
name: "conv1"
|
10 |
+
type: "Convolution"
|
11 |
+
bottom: "data"
|
12 |
+
top: "conv1"
|
13 |
+
param {
|
14 |
+
lr_mult: 1
|
15 |
+
decay_mult: 1
|
16 |
+
}
|
17 |
+
param {
|
18 |
+
lr_mult: 2
|
19 |
+
decay_mult: 0
|
20 |
+
}
|
21 |
+
convolution_param {
|
22 |
+
num_output: 10
|
23 |
+
kernel_size: 3
|
24 |
+
stride: 1
|
25 |
+
weight_filler {
|
26 |
+
type: "xavier"
|
27 |
+
}
|
28 |
+
bias_filler {
|
29 |
+
type: "constant"
|
30 |
+
value: 0
|
31 |
+
}
|
32 |
+
}
|
33 |
+
}
|
34 |
+
layer {
|
35 |
+
name: "PReLU1"
|
36 |
+
type: "PReLU"
|
37 |
+
bottom: "conv1"
|
38 |
+
top: "conv1"
|
39 |
+
}
|
40 |
+
layer {
|
41 |
+
name: "pool1"
|
42 |
+
type: "Pooling"
|
43 |
+
bottom: "conv1"
|
44 |
+
top: "pool1"
|
45 |
+
pooling_param {
|
46 |
+
pool: MAX
|
47 |
+
kernel_size: 2
|
48 |
+
stride: 2
|
49 |
+
}
|
50 |
+
}
|
51 |
+
|
52 |
+
layer {
|
53 |
+
name: "conv2"
|
54 |
+
type: "Convolution"
|
55 |
+
bottom: "pool1"
|
56 |
+
top: "conv2"
|
57 |
+
param {
|
58 |
+
lr_mult: 1
|
59 |
+
decay_mult: 1
|
60 |
+
}
|
61 |
+
param {
|
62 |
+
lr_mult: 2
|
63 |
+
decay_mult: 0
|
64 |
+
}
|
65 |
+
convolution_param {
|
66 |
+
num_output: 16
|
67 |
+
kernel_size: 3
|
68 |
+
stride: 1
|
69 |
+
weight_filler {
|
70 |
+
type: "xavier"
|
71 |
+
}
|
72 |
+
bias_filler {
|
73 |
+
type: "constant"
|
74 |
+
value: 0
|
75 |
+
}
|
76 |
+
}
|
77 |
+
}
|
78 |
+
layer {
|
79 |
+
name: "PReLU2"
|
80 |
+
type: "PReLU"
|
81 |
+
bottom: "conv2"
|
82 |
+
top: "conv2"
|
83 |
+
}
|
84 |
+
|
85 |
+
layer {
|
86 |
+
name: "conv3"
|
87 |
+
type: "Convolution"
|
88 |
+
bottom: "conv2"
|
89 |
+
top: "conv3"
|
90 |
+
param {
|
91 |
+
lr_mult: 1
|
92 |
+
decay_mult: 1
|
93 |
+
}
|
94 |
+
param {
|
95 |
+
lr_mult: 2
|
96 |
+
decay_mult: 0
|
97 |
+
}
|
98 |
+
convolution_param {
|
99 |
+
num_output: 32
|
100 |
+
kernel_size: 3
|
101 |
+
stride: 1
|
102 |
+
weight_filler {
|
103 |
+
type: "xavier"
|
104 |
+
}
|
105 |
+
bias_filler {
|
106 |
+
type: "constant"
|
107 |
+
value: 0
|
108 |
+
}
|
109 |
+
}
|
110 |
+
}
|
111 |
+
layer {
|
112 |
+
name: "PReLU3"
|
113 |
+
type: "PReLU"
|
114 |
+
bottom: "conv3"
|
115 |
+
top: "conv3"
|
116 |
+
}
|
117 |
+
|
118 |
+
|
119 |
+
layer {
|
120 |
+
name: "conv4-1"
|
121 |
+
type: "Convolution"
|
122 |
+
bottom: "conv3"
|
123 |
+
top: "conv4-1"
|
124 |
+
param {
|
125 |
+
lr_mult: 1
|
126 |
+
decay_mult: 1
|
127 |
+
}
|
128 |
+
param {
|
129 |
+
lr_mult: 2
|
130 |
+
decay_mult: 0
|
131 |
+
}
|
132 |
+
convolution_param {
|
133 |
+
num_output: 2
|
134 |
+
kernel_size: 1
|
135 |
+
stride: 1
|
136 |
+
weight_filler {
|
137 |
+
type: "xavier"
|
138 |
+
}
|
139 |
+
bias_filler {
|
140 |
+
type: "constant"
|
141 |
+
value: 0
|
142 |
+
}
|
143 |
+
}
|
144 |
+
}
|
145 |
+
|
146 |
+
layer {
|
147 |
+
name: "conv4-2"
|
148 |
+
type: "Convolution"
|
149 |
+
bottom: "conv3"
|
150 |
+
top: "conv4-2"
|
151 |
+
param {
|
152 |
+
lr_mult: 1
|
153 |
+
decay_mult: 1
|
154 |
+
}
|
155 |
+
param {
|
156 |
+
lr_mult: 2
|
157 |
+
decay_mult: 0
|
158 |
+
}
|
159 |
+
convolution_param {
|
160 |
+
num_output: 4
|
161 |
+
kernel_size: 1
|
162 |
+
stride: 1
|
163 |
+
weight_filler {
|
164 |
+
type: "xavier"
|
165 |
+
}
|
166 |
+
bias_filler {
|
167 |
+
type: "constant"
|
168 |
+
value: 0
|
169 |
+
}
|
170 |
+
}
|
171 |
+
}
|
172 |
+
layer {
|
173 |
+
name: "prob1"
|
174 |
+
type: "Softmax"
|
175 |
+
bottom: "conv4-1"
|
176 |
+
top: "prob1"
|
177 |
+
}
|