danhtran2mind commited on
Commit
c6146cb
·
verified ·
1 Parent(s): 6658077

Upload 129 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +15 -0
  2. SlimFace/.gitignore +197 -0
  3. SlimFace/.python-version +1 -0
  4. SlimFace/LICENSE +21 -0
  5. SlimFace/README.md +93 -0
  6. SlimFace/apps/gradio_app.py +26 -0
  7. SlimFace/apps/gradio_app/.gitkeep +0 -0
  8. SlimFace/apps/gradio_app/__init__.py +0 -0
  9. SlimFace/apps/gradio_app/inference.py +57 -0
  10. SlimFace/assets/comparision.md +11 -0
  11. SlimFace/assets/examples/.gitkeep +0 -0
  12. SlimFace/ckpts/.gitignore +4 -0
  13. SlimFace/configs/accelerate_config.yaml +6 -0
  14. SlimFace/configs/image_classification_models_config.yaml +249 -0
  15. SlimFace/data/.gitignore +5 -0
  16. SlimFace/data/raw/.gitignore +4 -0
  17. SlimFace/docs/README.md +1 -0
  18. SlimFace/docs/data/data_processing.md +135 -0
  19. SlimFace/docs/inference/inference_doc.md +15 -0
  20. SlimFace/docs/test/inference_test_doc.md +96 -0
  21. SlimFace/docs/test/training_test_doc.md +103 -0
  22. SlimFace/docs/training/training_doc.md +48 -0
  23. SlimFace/new_contruct.md +63 -0
  24. SlimFace/requirements/requirements.txt +15 -0
  25. SlimFace/requirements/requirements_compatible.txt +15 -0
  26. SlimFace/requirements/requirements_inference.txt +15 -0
  27. SlimFace/scripts/download_ckpts.py +104 -0
  28. SlimFace/scripts/process_dataset.py +242 -0
  29. SlimFace/scripts/setup_third_party.py +61 -0
  30. SlimFace/src/slimface/__init__.py +0 -0
  31. SlimFace/src/slimface/data/data_processing.py +67 -0
  32. SlimFace/src/slimface/data/process_face.py +64 -0
  33. SlimFace/src/slimface/inference/__init__.py +0 -0
  34. SlimFace/src/slimface/inference/end2end_inference.py +143 -0
  35. SlimFace/src/slimface/inference/inference.py +126 -0
  36. SlimFace/src/slimface/models/__init__.py +0 -0
  37. SlimFace/src/slimface/models/classification_models/__init__.py +0 -0
  38. SlimFace/src/slimface/models/classification_models/alls.py +55 -0
  39. SlimFace/src/slimface/models/classification_models/efficient_v1.py +0 -0
  40. SlimFace/src/slimface/models/classification_models/efficient_v2.py +0 -0
  41. SlimFace/src/slimface/models/classification_models/regnet.py +0 -0
  42. SlimFace/src/slimface/models/classification_models/vit.py +0 -0
  43. SlimFace/src/slimface/models/detection_models/align.py +57 -0
  44. SlimFace/src/slimface/models/detection_models/face_yolo.py +151 -0
  45. SlimFace/src/slimface/models/detection_models/mtcnn.py +175 -0
  46. SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/.gitignore +3 -0
  47. SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/LICENSE +21 -0
  48. SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/README.md +26 -0
  49. SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.caffemodel +0 -0
  50. SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.prototxt +177 -0
.gitattributes CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det2.caffemodel filter=lfs diff=lfs merge=lfs -text
37
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det3.caffemodel filter=lfs diff=lfs merge=lfs -text
38
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det4.caffemodel filter=lfs diff=lfs merge=lfs -text
39
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/example.png filter=lfs diff=lfs merge=lfs -text
40
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/office2.jpg filter=lfs diff=lfs merge=lfs -text
41
+ SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/images/office4.jpg filter=lfs diff=lfs merge=lfs -text
42
+ SlimFace/tests/test_images/Cate[[:space:]]Blanchett.jpg filter=lfs diff=lfs merge=lfs -text
43
+ SlimFace/tests/test_images/Daniel[[:space:]]Day-Lewis.jpg filter=lfs diff=lfs merge=lfs -text
44
+ SlimFace/tests/test_images/dont_know.jpg filter=lfs diff=lfs merge=lfs -text
45
+ SlimFace/tests/test_images/Elon_Musk.jpg filter=lfs diff=lfs merge=lfs -text
46
+ SlimFace/tests/test_images/Gal[[:space:]]Gado.jpg filter=lfs diff=lfs merge=lfs -text
47
+ SlimFace/tests/test_images/Kate[[:space:]]Winslet.jpg filter=lfs diff=lfs merge=lfs -text
48
+ SlimFace/tests/test_images/Tom[[:space:]]Cruise.jpg filter=lfs diff=lfs merge=lfs -text
49
+ SlimFace/tests/test_images/Tom[[:space:]]Hanks.jpg filter=lfs diff=lfs merge=lfs -text
50
+ SlimFace/tests/test_images/Viola[[:space:]]Davis.jpg filter=lfs diff=lfs merge=lfs -text
SlimFace/.gitignore ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Abstra
171
+ # Abstra is an AI-powered process automation framework.
172
+ # Ignore directories containing user credentials, local state, and settings.
173
+ # Learn more at https://abstra.io/docs
174
+ .abstra/
175
+
176
+ # Visual Studio Code
177
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
178
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
179
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
180
+ # you could uncomment the following to ignore the enitre vscode folder
181
+ # .vscode/
182
+
183
+ # Ruff stuff:
184
+ .ruff_cache/
185
+
186
+ # PyPI configuration file
187
+ .pypirc
188
+
189
+ # Cursor
190
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
191
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
192
+ # refer to https://docs.cursor.com/context/ignore-files
193
+ .cursorignore
194
+ .cursorindexingignore
195
+
196
+ # lightning_logs
197
+ lightning_logs
SlimFace/.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11.11
SlimFace/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Danh Tran
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
SlimFace/README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SlimFace: Slim Face Recognition
2
+
3
+ > ## Credits and Citation
4
+ >
5
+ > ℹ️ This project is based on the [![Built on edgeface](https://img.shields.io/badge/Built%20on-otroshi%2Fedgeface-blue?style=flat&logo=github)](https://github.com/otroshi/edgeface) by [![Hatef Otroshi](https://img.shields.io/badge/GitHub-Hatef_Otroshi-blue?style=flat&logo=github)](https://github.com/otroshi), and includes our own bug fixes and enhancements.
6
+ >
7
+ > If this project is helpful for your research, please consider citing the original paper:
8
+ >
9
+ > **Edgeface: Efficient face recognition model for edge devices**
10
+ > *George, Anjith and Ecabert, Christophe and Shahreza, Hatef Otroshi and Kotwal, Ketan and Marcel, Sebastien*
11
+ > *IEEE Transactions on Biometrics, Behavior, and Identity Science (2024)*
12
+ >
13
+ > **If you use this work in your research, please cite the original paper:**
14
+ > ```bibtex
15
+ > @article{edgeface,
16
+ > title={Edgeface: Efficient face recognition model for edge devices},
17
+ > author={George, Anjith and Ecabert, Christophe and Shahreza, Hatef Otroshi and Kotwal, Ketan and Marcel, Sebastien},
18
+ > journal={IEEE Transactions on Biometrics, Behavior, and Identity Science},
19
+ > year={2024}
20
+ > }
21
+ > ```
22
+
23
+
24
+ ## Usage
25
+ ### Clone Repositories
26
+ ```bash
27
+ # Clone the repository
28
+ git clone https://github.com/danhtran2mind/SlimFace
29
+
30
+ # Navigate into the newly created 'slimface' directory.
31
+ cd SlimFace
32
+ ```
33
+ ### Install Dependencies
34
+ **If Open-CV (CV2) does not work, run below CLI**
35
+ ```bash
36
+ sudo apt update
37
+ sudo apt install -y libglib2.0-0
38
+ sudo apt install -y libgl1-mesa-dev
39
+ ```
40
+ ### Default install Dependencies
41
+ ```bash
42
+ pip install -r requirements/requirements.txt
43
+ ```
44
+ ### Other install Dependencies
45
+ - For My Compatible
46
+ ```bash
47
+ pip install -r requirements/requirements_compatible.txt
48
+ ```
49
+ - For `End2end Inference`
50
+ ```bash
51
+ pip install -r requirements/requirements_inference.txt
52
+ ```
53
+ ### Download Model Checkpoints
54
+ ```bash
55
+ python scripts/download_ckpts.py
56
+ ```
57
+ ### Setup Third Party
58
+ ```bash
59
+ python scripts/setup_third_party.py
60
+ ```
61
+ ## Data Preparation
62
+
63
+ ## Pre-trained Model preparation
64
+ For detailed instructions on how to process and manage your data effectively, refer to the [Full guide for data processing](./docs/data_processing.md).
65
+
66
+ This is fast usage for dataset preparation
67
+ ```bash
68
+ python scripts/process_dataset.py
69
+ ```
70
+ ## Training
71
+
72
+ 1. Configure the default settings for Accelerate:
73
+ ```bash
74
+ accelerate config default
75
+ ```
76
+
77
+ 2. Launch the training script using Accelerate:
78
+ ```bash
79
+ accelerate launch src/slimface/training/accelerate_train.py
80
+ ```
81
+
82
+ For additional help, you can refer to the [Training Documentation](./docs/training/training_docs.md) for more details.
83
+
84
+ ## Demostration
85
+ ```bash
86
+ python apps/gradio_app.py
87
+ ```
88
+
89
+ https://huggingface.co/spaces/danhtran2mind/slimface
90
+
91
+ ## Project Description
92
+
93
+ This repository is trained from [![GitHub Repo](https://img.shields.io/badge/GitHub-danhtran2mind%2Fedgeface-blue?style=flat)](https://github.com/danhtran2mind/edgeface), a fork of [![GitHub Repo](https://img.shields.io/badge/GitHub-otroshi%2Fedgeface-blue?style=flat)](https://github.com/otroshi/edgeface), with numerous bug fixes and rewritten code for improved performance and stability.
SlimFace/apps/gradio_app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_app.inference import run_inference
3
+
4
+ def create_gradio_interface():
5
+ return gr.Interface(
6
+ fn=run_inference,
7
+ inputs=[
8
+ gr.Image(type="pil", label="Upload Image"),
9
+ gr.File(label="Reference Dict JSON File"),
10
+ gr.File(label="Index to Class Mapping JSON File"),
11
+ gr.File(label="Classifier Model (.pth) File"),
12
+ gr.Textbox(label="EdgeFace Model Name", value="edgeface_base"),
13
+ gr.Textbox(label="EdgeFace Model Directory", value="ckpts/idiap"),
14
+ gr.Dropdown(choices=["yolo", "mtcnn"], label="Face Detection Algorithm", value="yolo"),
15
+ gr.Dropdown(choices=["auto", "cpu", "gpu"], label="Accelerator", value="auto"),
16
+ gr.Slider(minimum=112, maximum=448, step=1, value=224, label="Resolution"),
17
+ gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.6, label="Similarity Threshold")
18
+ ],
19
+ outputs="text",
20
+ title="Face Classification with EdgeFace Validation",
21
+ description="Upload an image and required files to perform face classification with EdgeFace embedding validation."
22
+ )
23
+
24
+ if __name__ == "__main__":
25
+ iface = create_gradio_interface()
26
+ iface.launch()
SlimFace/apps/gradio_app/.gitkeep ADDED
File without changes
SlimFace/apps/gradio_app/__init__.py ADDED
File without changes
SlimFace/apps/gradio_app/inference.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from PIL import Image
4
+
5
+ # Append the path to the inference script's directory
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'src', 'slimface', 'inference')))
7
+ from end2end_inference import cinference_and_confirm
8
+
9
+ def run_inference(image, reference_dict_path, index_to_class_mapping_path, model_path,
10
+ edgeface_model_name="edgeface_base", edgeface_model_dir="ckpts/idiap",
11
+ algorithm="yolo", accelerator="auto", resolution=224, similarity_threshold=0.6):
12
+ # Save uploaded image temporarily in apps/gradio_app/
13
+ temp_image_path = os.path.join(os.path.dirname(__file__), "temp_image.jpg")
14
+ image.save(temp_image_path)
15
+
16
+ # Create args object to mimic command-line arguments
17
+ class Args:
18
+ def __init__(self):
19
+ self.unknown_image_path = temp_image_path
20
+ self.reference_dict_path = reference_dict_path.name if reference_dict_path else None
21
+ self.index_to_class_mapping_path = index_to_class_mapping_path.name if index_to_class_mapping_path else None
22
+ self.model_path = model_path.name if model_path else None
23
+ self.edgeface_model_name = edgeface_model_name
24
+ self.edgeface_model_dir = edgeface_model_dir
25
+ self.algorithm = algorithm
26
+ self.accelerator = accelerator
27
+ self.resolution = resolution
28
+ self.similarity_threshold = similarity_threshold
29
+
30
+ args = Args()
31
+
32
+ # Validate inputs
33
+ if not all([args.reference_dict_path, args.index_to_class_mapping_path, args.model_path]):
34
+ return "Error: Please provide all required files (reference dict, index-to-class mapping, and model)."
35
+
36
+ try:
37
+ # Call the inference function from end2end_inference.py
38
+ results = cinference_and_confirm(args)
39
+
40
+ # Format output
41
+ output = ""
42
+ for result in results:
43
+ output += f"Image: {result['image_path']}\n"
44
+ output += f"Predicted Class: {result['predicted_class']}\n"
45
+ output += f"Confidence: {result['confidence']:.4f}\n"
46
+ output += f"Similarity: {result.get('similarity', 'N/A'):.4f}\n"
47
+ output += f"Confirmed: {result.get('confirmed', 'N/A')}\n\n"
48
+
49
+ return output
50
+
51
+ except Exception as e:
52
+ return f"Error: {str(e)}"
53
+
54
+ finally:
55
+ # Clean up temporary image
56
+ if os.path.exists(temp_image_path):
57
+ os.remove(temp_image_path)
SlimFace/assets/comparision.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ slim_face_vit_b_16
2
+ Train loss: 0.0074, Train acc: 0.9980, Val loss: 0.2179, Val acc: 0.9336
3
+
4
+ efficientnet_b3
5
+ Train loss: 0.0014, Train acc: 1.0000, Val loss: 0.1931, Val acc: 0.9427
6
+
7
+ efficientnet_v2_s
8
+ Train loss: 0.0016, Train acc: 1.0000, Val loss: 0.2374, Val acc: 0.9375
9
+
10
+ regnet_y_800mf
11
+ Train loss: 0.0033, Train acc: 0.9997, Val loss: 0.3766, Val acc: 0.8906
SlimFace/assets/examples/.gitkeep ADDED
File without changes
SlimFace/ckpts/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Ignore everything in this directory
2
+ *
3
+ # Except this .gitignore file
4
+ !.gitignore
SlimFace/configs/accelerate_config.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ distributed_type: FSDP
3
+ num_processes: 4
4
+ mixed_precision: fp16
5
+ fsdp_config:
6
+ fsdp_offload_params: true
SlimFace/configs/image_classification_models_config.yaml ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For more details on models, see https://pytorch.org/vision/main/models.html
2
+
3
+ # EfficientNet models: Designed for efficiency with compound scaling of depth, width, and resolution.
4
+ # These models balance accuracy and computational efficiency, ideal for resource-constrained environments.
5
+ efficientnet_b0:
6
+ metrics:
7
+ Acc@1: 77.692 # Top-1 accuracy on ImageNet
8
+ Acc@5: 93.532 # Top-5 accuracy on ImageNet
9
+ GFLOPS: 0.39 # Computational complexity
10
+ Params: 5.3M # Number of parameters
11
+ model_fn: models.efficientnet_b0
12
+ resolution: 224 # Input image resolution
13
+ weights: models.EfficientNet_B0_Weights.IMAGENET1K_V1 # Pretrained weights on ImageNet
14
+
15
+ efficientnet_b1:
16
+ metrics:
17
+ Acc@1: 78.642
18
+ Acc@5: 94.186
19
+ GFLOPS: 0.69
20
+ Params: 7.8M
21
+ model_fn: models.efficientnet_b1
22
+ resolution: 240
23
+ weights: models.EfficientNet_B1_Weights.IMAGENET1K_V1
24
+
25
+ efficientnet_b2:
26
+ metrics:
27
+ Acc@1: 80.608
28
+ Acc@5: 95.31
29
+ GFLOPS: 1.09
30
+ Params: 9.1M
31
+ model_fn: models.efficientnet_b2
32
+ resolution: 260
33
+ weights: models.EfficientNet_B2_Weights.IMAGENET1K_V1
34
+
35
+ efficientnet_b3:
36
+ metrics:
37
+ Acc@1: 82.008
38
+ Acc@5: 96.054
39
+ GFLOPS: 1.83
40
+ Params: 12.2M
41
+ model_fn: models.efficientnet_b3
42
+ resolution: 300
43
+ weights: models.EfficientNet_B3_Weights.IMAGENET1K_V1
44
+
45
+ efficientnet_b4:
46
+ metrics:
47
+ Acc@1: 83.384
48
+ Acc@5: 96.594
49
+ GFLOPS: 4.39
50
+ Params: 19.3M
51
+ model_fn: models.efficientnet_b4
52
+ resolution: 380
53
+ weights: models.EfficientNet_B4_Weights.IMAGENET1K_V1
54
+
55
+ efficientnet_b5:
56
+ metrics:
57
+ Acc@1: 83.444
58
+ Acc@5: 96.628
59
+ GFLOPS: 10.27
60
+ Params: 30.4M
61
+ model_fn: models.efficientnet_b5
62
+ resolution: 456
63
+ weights: models.EfficientNet_B5_Weights.IMAGENET1K_V1
64
+
65
+ efficientnet_b6:
66
+ metrics:
67
+ Acc@1: 84.008
68
+ Acc@5: 96.916
69
+ GFLOPS: 19.07
70
+ Params: 43.0M
71
+ model_fn: models.efficientnet_b6
72
+ resolution: 528
73
+ weights: models.EfficientNet_B6_Weights.IMAGENET1K_V1
74
+
75
+ efficientnet_b7:
76
+ metrics:
77
+ Acc@1: 84.122
78
+ Acc@5: 96.908
79
+ GFLOPS: 37.75
80
+ Params: 66.3M
81
+ model_fn: models.efficientnet_b7
82
+ resolution: 600
83
+ weights: models.EfficientNet_B7_Weights.IMAGENET1K_V1
84
+
85
+ # EfficientNet V2 models: Improved training efficiency and performance over V1.
86
+ # These models use progressive learning and optimized scaling for better accuracy.
87
+ efficientnet_v2_l:
88
+ metrics:
89
+ Acc@1: 85.808
90
+ Acc@5: 97.788
91
+ GFLOPS: 56.08
92
+ Params: 118.5M
93
+ model_fn: models.efficientnet_v2_l
94
+ resolution: 480
95
+ weights: models.EfficientNet_V2_L_Weights.IMAGENET1K_V1
96
+
97
+ efficientnet_v2_m:
98
+ metrics:
99
+ Acc@1: 85.112
100
+ Acc@5: 97.156
101
+ GFLOPS: 24.58
102
+ Params: 54.1M
103
+ model_fn: models.efficientnet_v2_m
104
+ resolution: 480
105
+ weights: models.EfficientNet_V2_M_Weights.IMAGENET1K_V1
106
+
107
+ efficientnet_v2_s:
108
+ metrics:
109
+ Acc@1: 84.228
110
+ Acc@5: 96.878
111
+ GFLOPS: 8.37
112
+ Params: 21.5M
113
+ model_fn: models.efficientnet_v2_s
114
+ resolution: 384
115
+ weights: models.EfficientNet_V2_S_Weights.IMAGENET1K_V1
116
+
117
+ # RegNet models: Designed for scalability and efficiency with a focus on network design.
118
+ # These models optimize for both accuracy and computational efficiency.
119
+ regnet_y_128gf:
120
+ metrics:
121
+ Acc@1: 86.068 # High accuracy but computationally expensive
122
+ Acc@5: 97.844
123
+ GFLOPS: 127.52
124
+ Params: 644.8M
125
+ model_fn: models.regnet_y_128gf
126
+ resolution: 224
127
+ weights: models.RegNet_Y_128GF_Weights.IMAGENET1K_SWAG_LINEAR_V1
128
+
129
+ regnet_y_16gf:
130
+ metrics:
131
+ Acc@1: 82.886
132
+ Acc@5: 96.328
133
+ GFLOPS: 15.91
134
+ Params: 83.6M
135
+ model_fn: models.regnet_y_16gf
136
+ resolution: 224
137
+ weights: models.RegNet_Y_16GF_Weights.IMAGENET1K_V2
138
+
139
+ regnet_y_1_6gf:
140
+ metrics:
141
+ Acc@1: 80.876
142
+ Acc@5: 95.444
143
+ GFLOPS: 1.61
144
+ Params: 11.2M
145
+ model_fn: models.regnet_y_1_6gf
146
+ resolution: 224
147
+ weights: models.RegNet_Y_1_6GF_Weights.IMAGENET1K_V2
148
+
149
+ regnet_y_32gf:
150
+ metrics:
151
+ Acc@1: 83.368
152
+ Acc@5: 96.498
153
+ GFLOPS: 32.28
154
+ Params: 145.0M
155
+ model_fn: models.regnet_y_32gf
156
+ resolution: 224
157
+ weights: models.RegNet_Y_32GF_Weights.IMAGENET1K_V2
158
+
159
+ regnet_y_3_2gf:
160
+ metrics:
161
+ Acc@1: 81.982
162
+ Acc@5: 95.972
163
+ GFLOPS: 3.18
164
+ Params: 19.4M
165
+ model_fn: models.regnet_y_3_2gf
166
+ resolution: 224
167
+ weights: models.RegNet_Y_3_2GF_Weights.IMAGENET1K_V2
168
+
169
+ regnet_y_400mf:
170
+ metrics:
171
+ Acc@1: 75.804
172
+ Acc@5: 92.742
173
+ GFLOPS: 0.4
174
+ Params: 4.3M
175
+ model_fn: models.regnet_y_400mf
176
+ resolution: 224
177
+ weights: models.RegNet_Y_400MF_Weights.IMAGENET1K_V2
178
+
179
+ regnet_y_800mf:
180
+ metrics:
181
+ Acc@1: 78.828
182
+ Acc@5: 94.502
183
+ GFLOPS: 0.83
184
+ Params: 6.4M
185
+ model_fn: models.regnet_y_800mf
186
+ resolution: 224
187
+ weights: models.RegNet_Y_800MF_Weights.IMAGENET1K_V2
188
+
189
+ regnet_y_8gf:
190
+ metrics:
191
+ Acc@1: 82.828
192
+ Acc@5: 96.33
193
+ GFLOPS: 8.47
194
+ Params: 39.4M
195
+ model_fn: models.regnet_y_8gf
196
+ resolution: 224
197
+ weights: models.RegNet_Y_8GF_Weights.IMAGENET1K_V2
198
+
199
+ # Vision Transformer (ViT) models: Transformer-based architecture for image classification.
200
+ # These models excel in capturing long-range dependencies but require significant compute for larger variants.
201
+ vit_b_16:
202
+ metrics:
203
+ Acc@1: 81.072 # Base ViT model with balanced accuracy and efficiency
204
+ Acc@5: 95.318
205
+ GFLOPS: 17.56
206
+ Params: 86.6M
207
+ model_fn: models.vit_b_16
208
+ resolution: 224
209
+ weights: models.ViT_B_16_Weights.IMAGENET1K_V1
210
+
211
+ vit_b_32:
212
+ metrics:
213
+ Acc@1: 75.912 # Smaller patch size version of ViT, lower accuracy but fewer computations
214
+ Acc@5: 92.466
215
+ GFLOPS: 4.41
216
+ Params: 88.2M
217
+ model_fn: models.vit_b_32
218
+ resolution: 224
219
+ weights: models.ViT_B_32_Weights.IMAGENET1K_V1
220
+
221
+ vit_h_14:
222
+ metrics:
223
+ Acc@1: 88.552 # High-performance ViT model with very high accuracy and computational cost
224
+ Acc@5: 98.694
225
+ GFLOPS: 1016.72
226
+ Params: 633.5M
227
+ model_fn: models.vit_h_14
228
+ resolution: 224
229
+ weights: models.ViT_H_14_Weights.IMAGENET1K_SWAG_E2E_V1
230
+
231
+ vit_l_16:
232
+ metrics:
233
+ Acc@1: 79.662 # Larger ViT model with improved accuracy over base models
234
+ Acc@5: 94.638
235
+ GFLOPS: 61.55
236
+ Params: 304.3M
237
+ model_fn: models.vit_l_16
238
+ resolution: 224
239
+ weights: models.ViT_L_16_Weights.IMAGENET1K_V1
240
+
241
+ vit_l_32:
242
+ metrics:
243
+ Acc@1: 76.972 # Larger ViT with larger patch size, trading accuracy for reduced compute
244
+ Acc@5: 93.07
245
+ GFLOPS: 15.38
246
+ Params: 306.5M
247
+ model_fn: models.vit_l_32
248
+ resolution: 224
249
+ weights: models.ViT_L_32_Weights.IMAGENET1K_V1
SlimFace/data/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Ignore everything in this directory
2
+ *
3
+ # Except this .gitignore file
4
+ !.gitignore
5
+ !raw
SlimFace/data/raw/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Ignore everything in this directory
2
+ *
3
+ # Except this .gitignore file
4
+ !.gitignore
SlimFace/docs/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # This is Docs
SlimFace/docs/data/data_processing.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data Processing for slimface Training 🖼️
2
+
3
+ ## Table of Contents
4
+
5
+ - [Data Processing for slimface Training 🖼️](#data-processing-for-slimface-training-)
6
+ - [Command-Line Arguments](#command-line-arguments)
7
+ - [Command-Line Arguments for `process_dataset.py`](#command-line-arguments-for-process_datasetpy)
8
+ - [Example Usage](#example-usage)
9
+ - [Step-by-step process for handling a dataset](#step-by-step-process-for-handling-a-dataset)
10
+ - [Step 1: Clone the Repository](#step-1-clone-the-repository)
11
+ - [Step 2: Process the Dataset](#step-2-process-the-dataset)
12
+ - [Option 1: Using Dataset from Kaggle](#option-1-using-dataset-from-kaggle)
13
+ - [Option 2: Using a Custom Dataset](#option-2-using-a-custom-dataset)
14
+
15
+ ## Command-Line Arguments
16
+ ### Command-Line Arguments for `process_dataset.py`
17
+ When running `python scripts/process_dataset.py`, you can customize the dataset processing with the following command-line arguments:
18
+
19
+ | Argument | Type | Default | Description |
20
+ |----------|------|---------|-------------|
21
+ | `--dataset_slug` | `str` | `vasukipatel/face-recognition-dataset` | The Kaggle dataset slug in `username/dataset-name` format. Specifies which dataset to download from Kaggle. |
22
+ | `--base_dir` | `str` | `./data` | The base directory where the dataset will be stored and processed. |
23
+ | `--augment` | `flag` | `False` | Enables data augmentation (e.g., flipping, rotation) for training images to increase dataset variety. Use `--augment` to enable. |
24
+ | `--random_state` | `int` | `42` | Random seed for reproducibility in the train-test split. Ensures consistent splitting across runs. |
25
+ | `--test_split_rate` | `float` | `0.2` | Proportion of data to use for validation (between 0 and 1). For example, `0.2` means 20% of the data is used for validation. |
26
+ | `--rotation_range` | `int` | `15` | Maximum rotation angle in degrees for data augmentation (if `--augment` is enabled). Images may be rotated randomly within this range. |
27
+ | `--source_subdir` | `str` | `Original Images/Original Images` | Subdirectory within `raw_dir` containing the images to process. Used for both Kaggle and custom datasets. |
28
+ | `--delete_raw` | `flag` | `False` | Deletes the raw folder after processing to save storage. Use `--delete_raw` to enable. |
29
+
30
+ ### Example Usage
31
+ To process a Kaggle dataset with augmentation and a custom validation split:
32
+
33
+ ```bash
34
+ python scripts/process_dataset.py \
35
+ --augment \
36
+ --test_split_rate 0.3 \
37
+ --rotation_range 15
38
+ ```
39
+
40
+ To process a **custom dataset** with a specific subdirectory and delete the raw folder:
41
+
42
+ ```bash
43
+ python scripts/process_dataset.py \
44
+ --source_subdir your_custom_dataset_dir \
45
+ --delete_raw
46
+ ```
47
+ ## Step-by-step process for handling a dataset
48
+ These options allow flexible dataset processing tailored to your needs. 🚀
49
+
50
+ ### Step 1: Clone the Repository
51
+ Ensure the `slimface` project is set up by cloning the repository and navigating to the project directory:
52
+
53
+ ```bash
54
+ git clone https://github.com/danhtran2mind/slimface/
55
+ cd slimface
56
+ ```
57
+
58
+ ### Step 2: Process the Dataset
59
+
60
+ #### Option 1: Using Dataset from Kaggle
61
+ To download and process the sample dataset from Kaggle, run:
62
+
63
+ ```bash
64
+ python scripts/process_dataset.py
65
+ ```
66
+
67
+ This script organizes the dataset into the following structure under `data/`:
68
+
69
+ ```markdown
70
+ data/
71
+ ├── processed_ds/
72
+ │ ├── train_data/
73
+ │ │ ├── Charlize Theron/
74
+ │ │ │ ├── Charlize Theron_70.jpg
75
+ │ │ │ ├── Charlize Theron_46.jpg
76
+ │ │ │ ...
77
+ │ │ ├── Dwayne Johnson/
78
+ │ │ │ ├── Dwayne Johnson_58.jpg
79
+ │ │ │ ├── Dwayne Johnson_9.jpg
80
+ │ │ │ ...
81
+ │ └── val_data/
82
+ │ ├── Charlize Theron/
83
+ │ │ ├── Charlize Theron_60.jpg
84
+ │ │ ├── Charlize Theron_45.jpg
85
+ │ │ ...
86
+ │ ├── Dwayne Johnson/
87
+ │ │ ├── Dwayne Johnson_11.jpg
88
+ │ │ ├── Dwayne Johnson_46.jpg
89
+ │ │ ...
90
+ ├── raw/
91
+ │ ├── Faces/
92
+ │ │ ├── Jessica Alba_90.jpg
93
+ │ │ ├── Hugh Jackman_70.jpg
94
+ │ │ ...
95
+ │ ├── Original Images/
96
+ │ │ ├── Charlize Theron/
97
+ │ │ │ ├── Charlize Theron_60.jpg
98
+ │ │ │ ├── Charlize Theron_70.jpg
99
+ │ │ │ ...
100
+ │ │ ├── Dwayne Johnson/
101
+ │ │ │ ├── Dwayne Johnson_11.jpg
102
+ │ │ │ ├── Dwayne Johnson_58.jpg
103
+ │ │ │ ...
104
+ │ ├── dataset.zip
105
+ │ └── Dataset.csv
106
+ └── .gitignore
107
+ ```
108
+
109
+ #### Option 2: Using a Custom Dataset
110
+ If you prefer to use your own dataset, place it in `./data/raw/your_custom_dataset_dir/` with the following structure:
111
+
112
+ ```markdown
113
+ data/
114
+ ├── raw/
115
+ │ ├── your_custom_dataset_dir/
116
+ │ │ ├── Charlize Theron/
117
+ │ │ │ ├── Charlize Theron_60.jpg
118
+ │ │ │ ├── Charlize Theron_70.jpg
119
+ │ │ │ ...
120
+ │ │ ├── Dwayne Johnson/
121
+ │ │ │ ├── Dwayne Johnson_11.jpg
122
+ │ │ │ ├── Dwayne Johnson_58.jpg
123
+ │ │ │ ...
124
+ ```
125
+
126
+ If you use your dataset, you do not need to include only human faces, because **we support face extraction using face detection**, and all extracted faces are saved at `data/processed_ds`.
127
+
128
+ Then, process your custom dataset by specifying the subdirectory:
129
+
130
+ ```bash
131
+ python scripts/process_dataset.py \
132
+ --source_subdir your_custom_dataset_dir
133
+ ```
134
+
135
+ This ensures your dataset is properly formatted for training. 🚀
SlimFace/docs/inference/inference_doc.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ python src/slim_face/inference/inference.py \
3
+ --input_path <image_path> \
4
+ --model_path <model_path> \
5
+ --index_to_class_mapping_path <index_to_class_mapping_json_path>
6
+ ```
7
+
8
+ ## Example Usage
9
+
10
+ ```bash
11
+ python src/slim_face/inference/inference.py \
12
+ --input_path "assets/test_images/Elon_Musk.jpg" \
13
+ --model_path "ckpts/slim_face_regnet_y_800mf_full_model.pth" \
14
+ --index_to_class_mapping_path ckpts/index_to_class_mapping.json
15
+ ```
SlimFace/docs/test/inference_test_doc.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running Inference Test Script
2
+
3
+ Instructions to run the `tests/inference_test.sh` script in the `tests` folder on Linux, Windows, and macOS.
4
+
5
+ ## Prerequisites
6
+ 1. Install Python, PyTorch, Accelerate:
7
+ ```bash
8
+ pip install -r requirements/requirements.txt
9
+ ```
10
+ 2. Create virtual environment:
11
+ ```bash
12
+ python -m venv venv
13
+ source venv/bin/activate # Linux/macOS
14
+ .\venv\Scripts\activate # Windows
15
+ ```
16
+ 3. Make script executable (Linux/macOS):
17
+ ```bash
18
+ chmod +x tests/inference_test.sh
19
+ ```
20
+
21
+ ## Linux
22
+ 1. Open terminal, go to folder:
23
+ ```bash
24
+ cd tests
25
+ ```
26
+ 2. Run script:
27
+ ```bash
28
+ ./inference_test.sh
29
+ ```
30
+ 3. **Fix issues**:
31
+ - Use `bash inference_test.sh` if `./` fails.
32
+ - Fix line endings:
33
+ ```bash
34
+ sudo apt install dos2unix
35
+ dos2unix inference_test.sh
36
+ ```
37
+
38
+ ## Windows (using WSL)
39
+ 1. Install WSL and Ubuntu from Microsoft Store.
40
+ 2. Install dependencies:
41
+ ```bash
42
+ sudo apt update
43
+ sudo apt install python3 python3-pip
44
+ pip install -r requirements/requirements.txt
45
+ ```
46
+ 3. Go to folder:
47
+ ```bash
48
+ cd ./tests
49
+ ```
50
+ 4. Make executable:
51
+ ```bash
52
+ chmod +x inference_test.sh
53
+ ```
54
+ 5. Run script:
55
+ ```bash
56
+ ./inference_test.sh
57
+ ```
58
+ 6. **Fix issues**:
59
+ - Fix line endings:
60
+ ```bash
61
+ sudo apt install dos2unix
62
+ dos2unix inference_test.sh
63
+ ```
64
+
65
+ ## macOS
66
+ 1. Open Terminal, go to folder:
67
+ ```bash
68
+ cd tests
69
+ ```
70
+ 2. Install dependencies:
71
+ ```bash
72
+ brew install python
73
+ pip install -r requirements/requirements.txt
74
+ ```
75
+ 3. Make executable:
76
+ ```bash
77
+ chmod +x inference_test.sh
78
+ ```
79
+ 4. Run script:
80
+ ```bash
81
+ ./inference_test.sh
82
+ ```
83
+ 5. **Fix issues**:
84
+ - Fix line endings:
85
+ ```bash
86
+ brew install dos2unix
87
+ dos2unix inference_test.sh
88
+ ```
89
+
90
+ ## Notes
91
+ - Ensure GPU support (CUDA for Linux/Windows, MPS for macOS) if needed.
92
+ - Check script for extra settings (e.g., `export CUDA_VISIBLE_DEVICES=0`).
93
+ - Save output:
94
+ ```bash
95
+ ./inference_test.sh > output.log 2>&1
96
+ ```
SlimFace/docs/test/training_test_doc.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running Training Scripts
2
+
3
+ Instructions to run these scripts in the `tests` folder on Linux, Windows, and macOS:
4
+ - `tests/training_accelerate_efficientnet_b3.sh`
5
+ - `tests/training_accelerate_efficientnet_v2_s.sh`
6
+ - `tests/training_accelerate_regnet_y_800mf.sh`
7
+ - `tests/training_accelerate_vit_b_16_test.sh`
8
+
9
+ ## Prerequisites
10
+ 1. Install Python, PyTorch, Accelerate:
11
+ ```bash
12
+ pip install requirements/requirements.txt
13
+ ```
14
+ 2. Create virtual environment:
15
+ ```bash
16
+ python -m venv venv
17
+ source venv/bin/activate # Linux/macOS
18
+ .\venv\Scripts\activate # Windows
19
+ ```
20
+ 3. Make scripts executable (Linux/macOS):
21
+ ```bash
22
+ chmod +x tests/*.sh
23
+ ```
24
+
25
+ ## Linux
26
+ 1. Open terminal, go to folder:
27
+ ```bash
28
+ cd tests
29
+ ```
30
+ 2. Run scripts:
31
+ ```bash
32
+ ./training_accelerate_efficientnet_b3.sh
33
+ ./training_accelerate_efficientnet_v2_s.sh
34
+ ./training_accelerate_regnet_y_800mf.sh
35
+ ./training_accelerate_vit_b_16_test.sh
36
+ ```
37
+ 3. **Fix issues**:
38
+ - Use `bash training_accelerate_efficientnet_b3.sh` if `./` fails.
39
+ - Fix line endings:
40
+ ```bash
41
+ sudo apt install dos2unix
42
+ dos2unix training_accelerate_*.sh
43
+ ```
44
+
45
+ ## Windows (using WSL)
46
+ 1. Install WSL and Ubuntu from Microsoft Store.
47
+ 2. Install dependencies:
48
+ ```bash
49
+ sudo apt update
50
+ sudo apt install python3 python3-pip
51
+ pip install -r requirements/requirements.txt
52
+ ```
53
+ 3. Go to folder:
54
+ ```bash
55
+ cd ./tests
56
+ ```
57
+ 4. Make executable:
58
+ ```bash
59
+ chmod +x training_accelerate_*.sh
60
+ ```
61
+ 5. Run scripts:
62
+ ```bash
63
+ ./training_accelerate_efficientnet_b3.sh
64
+ ```
65
+ 6. **Fix issues**:
66
+ - Fix line endings:
67
+ ```bash
68
+ sudo apt install dos2unix
69
+ dos2unix training_accelerate_*.sh
70
+ ```
71
+
72
+ ## macOS
73
+ 1. Open Terminal, go to folder:
74
+ ```bash
75
+ cd tests
76
+ ```
77
+ 2. Install dependencies:
78
+ ```bash
79
+ brew install python
80
+ pip install -r requirements/requirements.txt
81
+ ```
82
+ 3. Make executable:
83
+ ```bash
84
+ chmod +x training_accelerate_*.sh
85
+ ```
86
+ 4. Run scripts:
87
+ ```bash
88
+ ./training_accelerate_efficientnet_b3.sh
89
+ ```
90
+ 5. **Fix issues**:
91
+ - Fix line endings:
92
+ ```bash
93
+ brew install dos2unix
94
+ dos2unix training_accelerate_*.sh
95
+ ```
96
+
97
+ ## Notes
98
+ - Ensure GPU support (CUDA for Linux/Windows, MPS for macOS) if needed.
99
+ - Check scripts for extra settings (e.g., `export CUDA_VISIBLE_DEVICES=0`).
100
+ - Save output:
101
+ ```bash
102
+ ./training_accelerate_efficientnet_b3.sh > output.log 2>&1
103
+ ```
SlimFace/docs/training/training_doc.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Training Documentation
3
+
4
+ This document outlines the command-line arguments and a concise overview of the training pipeline for a face classification model using PyTorch Lightning.
5
+
6
+ ## Table of Contents
7
+
8
+ - Arguments Table
9
+
10
+ - Training Pipeline Overview
11
+
12
+
13
+ # Training Arguments Documentation
14
+
15
+ This document outlines the command-line arguments and a concise overview of the training pipeline for a face classification model using PyTorch Lightning.
16
+
17
+ ## Table of Contents
18
+
19
+ - [Arguments Table](#arguments-table)
20
+ - [Training Pipeline Overview](#training-pipeline-overview)
21
+
22
+ ## Arguments Table
23
+
24
+ | Argument Name | Type | Description |
25
+ |----------------------------------------|-------|-------------------------------------------------------------------------------------------------------------------------------|
26
+ | `dataset_dir` | `str` | Path to the dataset directory containing `train_data` and `val_data` subdirectories with preprocessed face images organized by person. |
27
+ | `image_classification_models_config_path` | `str` | Path to the YAML configuration file defining model configurations, including model function, resolution, and weights. |
28
+ | `batch_size` | `int` | Batch size for training and validation data loaders. Affects memory usage and training speed. |
29
+ | `num_epochs` | `int` | Number of epochs for training the model. An epoch is one full pass through the training dataset. |
30
+ | `learning_rate` | `float` | Initial learning rate for the Adam optimizer used during training. |
31
+ | `max_lr_factor` | `float` | Multiplies the initial learning rate to determine the maximum learning rate during the warmup phase of the scheduler. |
32
+ | `accelerator` | `str` | Type of accelerator for training. Options: `cpu`, `gpu`, `tpu`, `auto`. `auto` selects the best available device. |
33
+ | `devices` | `int` | Number of devices (e.g., GPUs) to use for training. Relevant for multi-GPU training. |
34
+ | `algorithm` | `str` | Face detection algorithm for preprocessing images. Options: `mtcnn`, `yolo`. |
35
+ | `warmup_steps` | `float` | Fraction of total training steps for the warmup phase of the learning rate scheduler (e.g., `0.05` means 5% of total steps). |
36
+ | `total_steps` | `int` | Total number of training steps. If `0`, calculated as epochs × steps per epoch (based on dataset size and batch size). |
37
+ | `classification_model_name` | `str` | Name of the classification model to use, as defined in the YAML configuration file. |
38
+
39
+ ## Training Pipeline Overview
40
+
41
+ The training pipeline preprocesses face images, fine-tunes a classification head on a pretrained model, and trains using PyTorch Lightning. Key components:
42
+
43
+ 1. **Preprocessing**: Aligns faces using `yolo` or `mtcnn`, caches resized images (`preprocess_and_cache_images`).
44
+ 2. **Dataset**: `FaceDataset` loads pre-aligned images, applies normalization, and assigns labels by person.
45
+ 3. **Model**: `FaceClassifier` pairs a frozen pretrained model (e.g., EfficientNet) with a custom classification head.
46
+ 4. **Training**: `FaceClassifierLightning` manages training with Adam optimizer, cosine annealing scheduler, and logs loss/accuracy.
47
+ 5. **Configuration**: Loads model details from YAML (`load_model_configs`), uses `DataLoader` with multiprocessing, and saves models via `CustomModelCheckpoint`.
48
+ 6. **Execution**: `main` orchestrates preprocessing, data loading, model training, and saves full model and classifier head.
SlimFace/new_contruct.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```markdown
2
+ slim-face-recognition/
3
+ ├── src/ # Source code for the project
4
+ │ ├── slim_face/ # Main package for your project
5
+ │ │ ├── __init__.py # Marks directory as a Python package
6
+ │ │ ├── models/ # Model definitions and architectures
7
+ │ │ │ ├── __init__.py
8
+ │ │ │ ├── edgeface.py # Model definitions (e.g., edgeface backbones)
9
+ │ │ │ └── ... # Other model-related scripts
10
+ │ │ ├── data/ # Data loading and preprocessing
11
+ │ │ │ ├── __init__.py
12
+ │ │ │ ├── dataset.py # Custom Dataset classes for DataLoader
13
+ │ │ │ ├── align.py # Face alignment utilities (e.g., from edgeface)
14
+ │ │ │ └── ... # Other data-related scripts
15
+ │ │ ├── training/ # Training-related scripts and logic
16
+ │ │ │ ├── __init__.py
17
+ │ │ │ ├── train.py # Main training script
18
+ │ │ │ ├── accelerate_train.py # Accelerated training script
19
+ │ │ │ └── ... # Other training utilities
20
+ │ │ ├── inference/ # Inference-related scripts and logic
21
+ │ │ │ ├── __init__.py # Marks directory as a Python package
22
+ │ │ │ ├── inference.py # Face recognition inference logic
23
+ │ │ ├── utils/ # Utility functions (e.g., logging, metrics)
24
+ │ │ │ ├── __init__.py
25
+ │ │ │ ├── helpers.py # Miscellaneous helper functions
26
+ │ │ │ └── ... # Other utility scripts
27
+ │ │ └── __main__.py # Entry point for running the package as a module
28
+ ├── tests/ # Unit and integration tests
29
+ │ ├── __init__.py
30
+ │ ├── test_data.py # Tests for data loading
31
+ │ ├── test_models.py # Tests for model functionality
32
+ │ ├── test_training.py # Tests for training pipeline
33
+ │ ├── test_inference.py # Tests for inference pipeline
34
+ │ ├── test_images/ # Sample images for testing (e.g., Elon_Musk.jpg)
35
+ ├── data/ # Datasets and data-related files
36
+ │ ├── raw/ # Raw, unprocessed data
37
+ │ ├── processed/ # Preprocessed data (e.g., aligned faces)
38
+ │ └── external/ # External datasets (e.g., from Kaggle)
39
+ ├── scripts/ # Standalone scripts for tasks like data download
40
+ │ ├── download_dataset.py # Script to download datasets (e.g., Kaggle)
41
+ │ └── preprocess.py # Data preprocessing scripts
42
+ ├── notebooks/ # Jupyter notebooks for exploration and analysis
43
+ │ ├── ztest.ipynb # Existing notebook for testing/exploration
44
+ │ └── ... # Other exploratory notebooks
45
+ ├── ckpts/ # Model checkpoints and weights
46
+ │ ├── edgeface_xs_gamma_06.pt # Pretrained model weights
47
+ │ ├── edgeface_s_gamma_05.pt # Pretrained model weights
48
+ │ └── ... # Other checkpoints
49
+ ├── configs/ # Configuration files (e.g., YAML, JSON)
50
+ │ ├── training.yaml # Training hyperparameters
51
+ │ └── model.yaml # Model configurations
52
+ ├── docs/ # Documentation files
53
+ │ ├── api.md # API documentation
54
+ │ └── usage.md # Usage instructions
55
+ ├── requirements.txt # Main dependencies
56
+ ├── requirements_compatible.txt # Development dependencies (e.g., testing, linting)
57
+ ├── README.md # Project overview and setup instructions
58
+ ├── LICENSE # License file (e.g., MIT, Apache)
59
+ ├── .gitignore # Git ignore file
60
+ ├── .python-version # Python version specification (e.g., for pyenv)
61
+ ├── setup.py # Setup script for packaging the project
62
+ └── pyproject.toml # Modern Python project configuration (optional)
63
+ ```
SlimFace/requirements/requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.6.0
2
+ torchvision>=0.21.0
3
+ timm>=1.0.15
4
+ mxnet>=1.9.1
5
+ opencv-python>=4.10.0.84
6
+ numpy>=1.26.0,<2.0.0
7
+ pytorch-lightning>=2.5.1
8
+ tqdm
9
+ imgaug
10
+ accelerate>=1.6.0
11
+ scikit-learn
12
+ pillow
13
+ requests
14
+ ultralytics
15
+ huggingface-hub>=0.31.1
SlimFace/requirements/requirements_compatible.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.6.0
2
+ torchvision==0.21.0
3
+ timm==1.0.15
4
+ mxnet==1.9.1
5
+ opencv-python==4.11.0.86
6
+ numpy==1.26.4
7
+ pillow==11.2.1
8
+ pytorch-lightning==2.5.1
9
+ accelerate==1.6.0
10
+ imgaug==0.4.0
11
+ scikit-learn==1.6.1
12
+ pillow==11.2.1
13
+ requests==2.32.4
14
+ ultralytics==8.3.160
15
+ huggingface-hub==0.31.1
SlimFace/requirements/requirements_inference.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.6.0
2
+ torchvision>=0.21.0
3
+ timm>=1.0.15
4
+ mxnet>=1.9.1
5
+ opencv-python>=4.10.0.84
6
+ numpy>=1.26.0,<2.0.0
7
+ ultralytics
8
+ pytorch-lightning>=2.5.1
9
+ tqdm
10
+ imgaug
11
+ accelerate>=1.6.0
12
+ scikit-learn
13
+ pillow
14
+ requests
15
+ huggingface-hub>=0.31.1
SlimFace/scripts/download_ckpts.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+ from huggingface_hub import snapshot_download
4
+
5
+ # Model configurations for EdgeFace models
6
+ model_configs = {
7
+ "edgeface_base": {
8
+ "repo": "idiap/EdgeFace-Base",
9
+ "filename": "edgeface_base.pt",
10
+ "local_dir": "ckpts/idiap"
11
+ },
12
+ "edgeface_s_gamma_05": {
13
+ "repo": "idiap/EdgeFace-S-GAMMA",
14
+ "filename": "edgeface_s_gamma_05.pt",
15
+ "local_dir": "ckpts/idiap"
16
+ },
17
+ "edgeface_xs_gamma_06": {
18
+ "repo": "idiap/EdgeFace-XS-GAMMA",
19
+ "filename": "edgeface_xs_gamma_06.pt",
20
+ "local_dir": "ckpts/idiap"
21
+ },
22
+ "edgeface_xxs": {
23
+ "repo": "idiap/EdgeFace-XXS",
24
+ "filename": "edgeface_xxs.pt",
25
+ "local_dir": "ckpts/idiap"
26
+ },
27
+ "SlimFace_efficientnet_b3": {
28
+ "repo": "danhtran2mind/SlimFace-sample-checkpoints",
29
+ "filename": "SlimFace_efficientnet_b3_full_model.pth",
30
+ "local_dir": "ckpts"
31
+ },
32
+ "SlimFace_efficientnet_v2_s": {
33
+ "repo": "danhtran2mind/SlimFace-sample-checkpoints",
34
+ "filename": "SlimFace_efficientnet_v2_s_full_model.pth",
35
+ "local_dir": "ckpts"
36
+ },
37
+ "SlimFace_regnet_y_800mf": {
38
+ "repo": "danhtran2mind/SlimFace-sample-checkpoints",
39
+ "filename": "SlimFace_regnet_y_800mf_full_model.pth",
40
+ "local_dir": "ckpts"
41
+ },
42
+ "SlimFace_vit_b_16": {
43
+ "repo": "danhtran2mind/SlimFace-sample-checkpoints",
44
+ "filename": "SlimFace_vit_b_16_full_model.pth",
45
+ "local_dir": "ckpts"
46
+ },
47
+ "SlimFace_mapping": {
48
+ "repo": "danhtran2mind/SlimFace-sample-checkpoints",
49
+ "filename": "index_to_class_mapping.json",
50
+ "local_dir": "ckpts"
51
+ }
52
+ }
53
+
54
+ def download_models(model_name=None):
55
+ """Download specified models from model_configs to their respective local directories.
56
+
57
+ Args:
58
+ model_name (str, optional): Specific model to download. If None, download all models.
59
+ """
60
+ # Determine files to download
61
+ if model_name:
62
+ if model_name not in model_configs:
63
+ raise ValueError(f"Model {model_name} not found in available models: {list(model_configs.keys())}")
64
+ configs_to_download = [model_configs[model_name]]
65
+ else:
66
+ configs_to_download = list(model_configs.values())
67
+
68
+ for config in configs_to_download:
69
+ repo_id = config["repo"]
70
+ filename = config["filename"]
71
+ local_dir = config["local_dir"]
72
+
73
+ # Ensure the local directory exists
74
+ os.makedirs(local_dir, exist_ok=True)
75
+
76
+ try:
77
+ snapshot_download(
78
+ repo_id=repo_id,
79
+ local_dir=local_dir,
80
+ local_dir_use_symlinks=False,
81
+ allow_patterns=[filename],
82
+ cache_dir=None,
83
+ revision="main"
84
+ )
85
+ print(f"Downloaded {filename} to {local_dir}")
86
+ except Exception as e:
87
+ print(f"Error downloading {filename}: {e}")
88
+
89
+ def main():
90
+ """Parse command-line arguments and initiate model download."""
91
+ parser = argparse.ArgumentParser(description="Download models from Hugging Face Hub.")
92
+ parser.add_argument(
93
+ "--model",
94
+ type=str,
95
+ default=None,
96
+ choices=list(model_configs.keys()),
97
+ help="Specific model to download. If not provided, all models are downloaded."
98
+ )
99
+ args = parser.parse_args()
100
+
101
+ download_models(args.model)
102
+
103
+ if __name__ == "__main__":
104
+ main()
SlimFace/scripts/process_dataset.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ import requests
4
+ import json
5
+ from tqdm import tqdm
6
+ from sklearn.model_selection import train_test_split
7
+ import imgaug.augmenters as iaa
8
+ import sys
9
+ import argparse
10
+ import shutil
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
13
+ from src.slimface.data.data_processing import process_image
14
+
15
+ def download_and_split_kaggle_dataset(
16
+ dataset_slug,
17
+ base_dir="data",
18
+ augment=False,
19
+ random_state=42,
20
+ test_split_rate=0.2,
21
+ rotation_range=15,
22
+ source_subdir="Original Images/Original Images",
23
+ delete_raw=False
24
+ ):
25
+ """Download a Kaggle dataset, split it into train/validation sets, and process images for face recognition.
26
+
27
+ Skips downloading if ZIP exists and unzipping if raw folder contains files.
28
+ Optionally deletes the raw folder to save storage.
29
+
30
+ Args:
31
+ dataset_slug (str): Dataset slug in 'username/dataset-name' format.
32
+ base_dir (str): Base directory for storing dataset.
33
+ augment (bool): Whether to apply data augmentation to training images.
34
+ random_state (int): Random seed for reproducibility in train-test split.
35
+ test_split_rate (float): Proportion of data to use for validation (between 0 and 1).
36
+ rotation_range (int): Maximum rotation angle in degrees for augmentation.
37
+ source_subdir (str): Subdirectory within raw_dir containing images.
38
+ delete_raw (bool): Whether to delete the raw folder after processing to save storage.
39
+
40
+ Raises:
41
+ ValueError: If test_split_rate is not between 0 and 1 or dataset_slug is invalid.
42
+ FileNotFoundError: If source directory is not found.
43
+ Exception: If dataset download fails or other errors occur.
44
+ """
45
+ try:
46
+ # Validate test_split_rate
47
+ if not 0 < test_split_rate < 1:
48
+ raise ValueError("test_split_rate must be between 0 and 1")
49
+
50
+ # Set up directories
51
+ raw_dir = os.path.join(base_dir, "raw")
52
+ processed_dir = os.path.join(base_dir, "processed_ds")
53
+ train_dir = os.path.join(processed_dir, "train_data")
54
+ val_dir = os.path.join(processed_dir, "val_data")
55
+ zip_path = os.path.join(raw_dir, "dataset.zip")
56
+
57
+ os.makedirs(raw_dir, exist_ok=True)
58
+ os.makedirs(processed_dir, exist_ok=True)
59
+
60
+ # Check if ZIP file already exists
61
+ if os.path.exists(zip_path):
62
+ print(f"ZIP file already exists at {zip_path}, skipping download.")
63
+ else:
64
+ # Download dataset with progress bar
65
+ username, dataset_name = dataset_slug.split("/")
66
+ if not (username and dataset_name):
67
+ raise ValueError("Invalid dataset slug format. Expected 'username/dataset-name'")
68
+
69
+ dataset_url = f"https://www.kaggle.com/api/v1/datasets/download/{username}/{dataset_name}"
70
+ print(f"Downloading dataset {dataset_slug}...")
71
+ response = requests.get(dataset_url, stream=True)
72
+ if response.status_code != 200:
73
+ raise Exception(f"Failed to download dataset: {response.status_code}")
74
+
75
+ total_size = int(response.headers.get("content-length", 0))
76
+ with open(zip_path, "wb") as file, tqdm(
77
+ desc="Downloading dataset",
78
+ total=total_size,
79
+ unit="B",
80
+ unit_scale=True,
81
+ unit_divisor=1024,
82
+ ) as pbar:
83
+ for chunk in response.iter_content(chunk_size=8192):
84
+ if chunk:
85
+ file.write(chunk)
86
+ pbar.update(len(chunk))
87
+
88
+ # # Check if raw directory contains files, excluding the ZIP file
89
+ # zip_filename = os.path.basename(zip_path)
90
+ # if os.path.exists(raw_dir) and any(file != zip_filename for file in os.listdir(raw_dir)):
91
+ # print(f"Raw directory {raw_dir} already contains files, skipping extraction.")
92
+ # else:
93
+ # Extract dataset
94
+ print("Extracting dataset...")
95
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
96
+ zip_ref.extractall(raw_dir)
97
+
98
+ # Define source directory
99
+ source_dir = os.path.join(raw_dir, source_subdir)
100
+ if not os.path.exists(source_dir):
101
+ raise FileNotFoundError(f"Source directory {source_dir} not found")
102
+
103
+ # Group files by person (subfolder names)
104
+ person_files = {}
105
+ for person in os.listdir(source_dir):
106
+ person_dir = os.path.join(source_dir, person)
107
+ if os.path.isdir(person_dir):
108
+ person_files[person] = [
109
+ f for f in os.listdir(person_dir)
110
+ if os.path.isfile(os.path.join(person_dir, f))
111
+ and f.lower().endswith((".png", ".jpg", ".jpeg"))
112
+ ]
113
+
114
+ # Define augmentation pipeline
115
+ if augment:
116
+ aug = iaa.Sequential([
117
+ iaa.Fliplr(p=1.0),
118
+ iaa.Sometimes(
119
+ 0.5,
120
+ iaa.Affine(rotate=(-rotation_range, rotation_range))
121
+ ),
122
+ ])
123
+ else:
124
+ aug = None
125
+
126
+ # Process and split files with progress bar
127
+ total_files = sum(len(images) for images in person_files.values())
128
+ with tqdm(total=total_files, desc="Processing and copying files", unit="file") as pbar:
129
+ for person, images in person_files.items():
130
+ # Set up directories for this person
131
+ train_person_dir = os.path.join(train_dir, person)
132
+ val_person_dir = os.path.join(val_dir, person)
133
+ temp_dir = os.path.join(processed_dir, "temp")
134
+ os.makedirs(train_person_dir, exist_ok=True)
135
+ os.makedirs(val_person_dir, exist_ok=True)
136
+ os.makedirs(temp_dir, exist_ok=True)
137
+
138
+ all_image_filenames = []
139
+
140
+ # Process images and create augmentations before splitting
141
+ for img in images:
142
+ src_path = os.path.join(source_dir, person, img)
143
+ saved_images = process_image(src_path, temp_dir, aug if augment else None)
144
+ all_image_filenames.extend(saved_images)
145
+ pbar.update(1)
146
+
147
+ # Split all images (original and augmented) for this person
148
+ train_images_filenames, val_images_filenames = train_test_split(
149
+ all_image_filenames,
150
+ test_size=test_split_rate,
151
+ random_state=random_state,
152
+ )
153
+
154
+ # Move images to final train/val directories
155
+ for img in all_image_filenames:
156
+ src = os.path.join(temp_dir, img)
157
+ if not os.path.exists(src):
158
+ print(f"Warning: File {src} not found, skipping.")
159
+ continue
160
+ if img in train_images_filenames:
161
+ dst = os.path.join(train_person_dir, img)
162
+ else:
163
+ dst = os.path.join(val_person_dir, img)
164
+ os.rename(src, dst)
165
+
166
+ # Clean up temporary directory for this person
167
+ shutil.rmtree(temp_dir, ignore_errors=True)
168
+ print(f"\nCleaned up temp directory for {person}")
169
+
170
+ # Optionally delete raw folder to save storage
171
+ if delete_raw:
172
+ print(f"Deleting raw folder {raw_dir} to save storage...")
173
+ shutil.rmtree(raw_dir, ignore_errors=True)
174
+ print(f"Raw folder {raw_dir} deleted.")
175
+
176
+ print(f"Dataset {dataset_slug} downloaded, extracted, processed, and split successfully!")
177
+
178
+ except Exception as e:
179
+ print(f"Error processing dataset: {e}")
180
+ raise
181
+
182
+ if __name__ == "__main__":
183
+ parser = argparse.ArgumentParser(description="Download and process a Kaggle dataset for face recognition.")
184
+ parser.add_argument(
185
+ "--dataset_slug",
186
+ type=str,
187
+ default="vasukipatel/face-recognition-dataset",
188
+ help="Kaggle dataset slug in 'username/dataset-name' format"
189
+ )
190
+ parser.add_argument(
191
+ "--base_dir",
192
+ type=str,
193
+ default="./data",
194
+ help="Base directory for storing dataset"
195
+ )
196
+ parser.add_argument(
197
+ "--augment",
198
+ action="store_true",
199
+ help="Enable data augmentation"
200
+ )
201
+ parser.add_argument(
202
+ "--random_state",
203
+ type=int,
204
+ default=42,
205
+ help="Random seed for train-test split reproducibility"
206
+ )
207
+ parser.add_argument(
208
+ "--test_split_rate",
209
+ type=float,
210
+ default=0.2,
211
+ help="Proportion of data for validation (between 0 and 1)"
212
+ )
213
+ parser.add_argument(
214
+ "--rotation_range",
215
+ type=int,
216
+ default=15,
217
+ help="Maximum rotation angle in degrees for augmentation"
218
+ )
219
+ parser.add_argument(
220
+ "--source_subdir",
221
+ type=str,
222
+ default="Original Images/Original Images",
223
+ help="Subdirectory within raw_dir containing images"
224
+ )
225
+ parser.add_argument(
226
+ "--delete_raw",
227
+ action="store_true",
228
+ help="Delete the raw folder after processing to save storage"
229
+ )
230
+
231
+ args = parser.parse_args()
232
+
233
+ download_and_split_kaggle_dataset(
234
+ dataset_slug=args.dataset_slug,
235
+ base_dir=args.base_dir,
236
+ augment=args.augment,
237
+ random_state=args.random_state,
238
+ test_split_rate=args.test_split_rate,
239
+ rotation_range=args.rotation_range,
240
+ source_subdir=args.source_subdir,
241
+ delete_raw=args.delete_raw
242
+ )
SlimFace/scripts/setup_third_party.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import argparse
5
+
6
+ def run_command(command, error_message):
7
+ """Run a shell command and handle errors."""
8
+ try:
9
+ subprocess.check_call(command, shell=True)
10
+ except subprocess.CalledProcessError:
11
+ print(f"Error: {error_message}")
12
+ sys.exit(1)
13
+
14
+ def setup_edgeface(repo_url, third_party_dir, branch=None):
15
+ """Set up edgeface as a third-party dependency in the specified directory."""
16
+ edgeface_dir = os.path.join(third_party_dir, "edgeface")
17
+
18
+ # Create third_party directory if it doesn't exist
19
+ if not os.path.exists(third_party_dir):
20
+ os.makedirs(third_party_dir)
21
+ print(f"Created directory: {third_party_dir}")
22
+
23
+ # Clone edgeface if not already present
24
+ if not os.path.exists(edgeface_dir):
25
+ print(f"Cloning edgeface into {edgeface_dir}...")
26
+ clone_command = f"git clone {repo_url} {edgeface_dir}"
27
+ if branch:
28
+ clone_command = f"git clone -b {branch} {repo_url} {edgeface_dir}"
29
+ run_command(
30
+ clone_command,
31
+ f"Failed to clone edgeface from {repo_url}"
32
+ )
33
+ else:
34
+ print(f"edgeface already exists at {edgeface_dir}")
35
+
36
+ # Verify edgeface directory contains expected files
37
+ if os.path.exists(edgeface_dir) and os.listdir(edgeface_dir):
38
+ print(f"edgeface setup completed successfully at {edgeface_dir}")
39
+ else:
40
+ print(f"Error: edgeface directory is empty or invalid")
41
+ sys.exit(1)
42
+
43
+ if __name__ == "__main__":
44
+ parser = argparse.ArgumentParser(description="Set up edgeface as a third-party dependency.")
45
+ parser.add_argument(
46
+ "--repo-url",
47
+ default="https://github.com/danhtran2mind/edgeface.git",
48
+ help="Git repository URL for edgeface (default: %(default)s)"
49
+ )
50
+ parser.add_argument(
51
+ "--third-party-dir",
52
+ default=os.path.join("src", "third_party"),
53
+ help="Directory to store third-party dependencies (default: %(default)s)"
54
+ )
55
+ parser.add_argument(
56
+ "--branch",
57
+ help="Git branch to clone (optional)"
58
+ )
59
+ args = parser.parse_args()
60
+
61
+ setup_edgeface(args.repo_url, args.third_party_dir, args.branch)
SlimFace/src/slimface/__init__.py ADDED
File without changes
SlimFace/src/slimface/data/data_processing.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import numpy as np
3
+ import os
4
+ import imgaug.augmenters as iaa
5
+ import random
6
+ import uuid
7
+
8
+ RANDOM_RATIO = 0.5 # 0.5
9
+ # TARGET_SIZE = (224, 224) # Standard size for face recognition models
10
+
11
+ def process_image(src_path, dest_dir, aug=None):
12
+ """
13
+ Process an image by resizing, normalizing, and optionally augmenting it.
14
+ Saves both raw and augmented versions of the image.
15
+
16
+ Args:
17
+ src_path (str): Path to the source image
18
+ dest_dir (str): Destination directory for the raw and augmented images
19
+ aug (iaa.Sequential, optional): Augmentation pipeline
20
+ Returns:
21
+ list: List of saved image filenames (raw and optionally augmented)
22
+ """
23
+ saved_images = []
24
+ try:
25
+ # Open and process image
26
+ img = Image.open(src_path).convert('RGB')
27
+
28
+ # Resize image
29
+ # img = img.resize(TARGET_SIZE, Image.Resampling.LANCZOS)
30
+
31
+ # Convert to numpy array and normalize
32
+ img_array = np.array(img) / 255.0
33
+
34
+ # Save raw processed image
35
+ raw_filename = os.path.basename(src_path)
36
+ base, ext = os.path.splitext(raw_filename)
37
+ raw_dest_path = os.path.join(dest_dir, raw_filename)
38
+ counter = 1
39
+ while os.path.exists(raw_dest_path):
40
+ raw_filename = f"{base}_{counter}{ext}"
41
+ raw_dest_path = os.path.join(dest_dir, raw_filename)
42
+ counter += 1
43
+ raw_img = Image.fromarray((img_array * 255).astype(np.uint8))
44
+ raw_img.save(raw_dest_path, quality=100)
45
+ saved_images.append(raw_filename)
46
+
47
+ # Apply augmentation if specified and save augmented image
48
+ if aug and random.random() <= RANDOM_RATIO:
49
+ img_array_aug = aug.augment_image(img_array)
50
+ # Clip values to ensure valid range after augmentation
51
+ img_array_aug = np.clip(img_array_aug, 0, 1)
52
+ # Convert back to image
53
+ aug_img = Image.fromarray((img_array_aug * 255).astype(np.uint8))
54
+ # Save augmented image with unique suffix
55
+ aug_filename = f"aug_{base}_{uuid.uuid4().hex[:8]}{ext}"
56
+ aug_dest_path = os.path.join(dest_dir, aug_filename)
57
+ aug_img.save(aug_dest_path, quality=100)
58
+ saved_images.append(aug_filename)
59
+
60
+ except Image.UnidentifiedImageError:
61
+ print(f"Error: Cannot identify image file {src_path}")
62
+ except OSError as e:
63
+ print(f"Error processing image {src_path}: {e}")
64
+ except Exception as e:
65
+ print(f"Unexpected error processing image {src_path}: {e}")
66
+
67
+ return saved_images
SlimFace/src/slimface/data/process_face.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from PIL import Image
4
+ from tqdm import tqdm
5
+ import warnings
6
+
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+
9
+ from models.detection_models import align # Assuming this is available in your project
10
+
11
+ def extract_and_save_faces(input_dir, output_dir, algorithm='yolo', resolution=224):
12
+ """Preprocess images using face alignment and cache them with specified resolution."""
13
+ if align is None:
14
+ raise ImportError("face_alignment package is required for preprocessing.")
15
+ os.makedirs(output_dir, exist_ok=True)
16
+ with warnings.catch_warnings():
17
+ warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
18
+ for person in sorted(os.listdir(input_dir)):
19
+ person_path = os.path.join(input_dir, person)
20
+ if not os.path.isdir(person_path):
21
+ continue
22
+ output_person_path = os.path.join(output_dir, person)
23
+ os.makedirs(output_person_path, exist_ok=True)
24
+ skipped_count = 0
25
+ for img_name in tqdm(os.listdir(person_path), desc=f"Processing {person}"):
26
+ if not img_name.endswith(('.jpg', '.jpeg', '.png')):
27
+ continue
28
+ img_path = os.path.join(person_path, img_name)
29
+ output_img_path = os.path.join(output_person_path, img_name)
30
+ if os.path.exists(output_img_path):
31
+ skipped_count += 1
32
+ continue
33
+ try:
34
+ aligned_result = align.get_aligned_face([img_path], algorithm=algorithm)
35
+ aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else None
36
+ if aligned_image is None:
37
+ print(f"Face detection failed for {img_path}, using resized original image")
38
+ aligned_image = Image.open(img_path).convert('RGB')
39
+ aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
40
+ aligned_image.save(output_img_path, quality=100)
41
+ except Exception as e:
42
+ print(f"Error processing {img_path}: {e}")
43
+ aligned_image = Image.open(img_path).convert('RGB')
44
+ aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
45
+ aligned_image.save(output_img_path, quality=100)
46
+ if skipped_count > 0:
47
+ print(f"Skipped {skipped_count} images for {person} that were already processed.")
48
+
49
+ if __name__ == "__main__":
50
+ import argparse
51
+ parser = argparse.ArgumentParser(description="Preprocess and cache images with face alignment.")
52
+ parser.add_argument('--input_dir', type=str, required=True, help='Directory containing raw images.')
53
+ parser.add_argument('--output_dir', type=str, required=True, help='Directory to save preprocessed images.')
54
+ parser.add_argument('--algorithm', type=str, default='yolo', choices=['yolo', 'dlib'], help='Face detection algorithm to use.')
55
+ parser.add_argument('--resolution', type=int, default=224, help='Resolution for the output images.')
56
+
57
+ args = parser.parse_args()
58
+ extract_and_save_faces(args.input_dir, args.output_dir, args.algorithm, args.resolution)
59
+
60
+ # python src/slimface/data/preprocess.py \
61
+ # --input_dir "data/raw/Original Images/Original Images" \
62
+ # --output_dir "data/processed/Aligned Images" \
63
+ # --algorithm "yolo" \
64
+ # --resolution 224
SlimFace/src/slimface/inference/__init__.py ADDED
File without changes
SlimFace/src/slimface/inference/end2end_inference.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import torchvision.transforms as transforms
5
+ from PIL import Image
6
+ import argparse
7
+ import warnings
8
+ import json
9
+
10
+ # Append necessary paths
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "third_party")))
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from edgeface.face_alignment import align as edgeface_align
15
+ from edgeface.backbones import get_model
16
+ from models.detection_models import align as align_classifier
17
+
18
+ def preprocess_image(image_path, algorithm='yolo', resolution=224):
19
+ try:
20
+ with warnings.catch_warnings():
21
+ warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
22
+ aligned_result = align_classifier.get_aligned_face([image_path], algorithm=algorithm)
23
+ aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else Image.open(image_path).convert('RGB')
24
+ aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
25
+ except Exception as e:
26
+ print(f"Error processing {image_path}: {e}")
27
+ aligned_image = Image.open(image_path).convert('RGB').resize((resolution, resolution), Image.Resampling.LANCZOS)
28
+
29
+ transform = transforms.Compose([
30
+ transforms.ToTensor(),
31
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
32
+ ])
33
+ return transform(aligned_image).unsqueeze(0)
34
+
35
+ def load_model(model_path):
36
+ try:
37
+ model = torch.jit.load(model_path, map_location=torch.device('cpu'))
38
+ model.eval()
39
+ return model
40
+ except Exception as e:
41
+ raise RuntimeError(f"Failed to load model from {model_path}: {e}")
42
+
43
+ def load_class_mapping(index_to_class_mapping_path):
44
+ try:
45
+ with open(index_to_class_mapping_path, 'r') as f:
46
+ idx_to_class = json.load(f)
47
+ return {int(k): v for k, v in idx_to_class.items()}
48
+ except Exception as e:
49
+ raise ValueError(f"Error loading class mapping: {e}")
50
+
51
+ def get_edgeface_embeddings(image_path, model_name="edgeface_base", model_dir="ckpts/idiap"):
52
+ model = get_model(model_name)
53
+ model.load_state_dict(torch.load(f'{model_dir}/{model_name}.pt', map_location='cpu'))
54
+ model.eval()
55
+
56
+ transform = transforms.Compose([
57
+ transforms.ToTensor(),
58
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
59
+ ])
60
+
61
+ aligned_result = edgeface_align.get_aligned_face(image_path, algorithm='yolo')
62
+ if not aligned_result:
63
+ raise ValueError(f"Face alignment failed for {image_path}")
64
+
65
+ with torch.no_grad():
66
+ return model(transform(aligned_result[0][1]).unsqueeze(0))
67
+
68
+ def inference_and_confirm(args):
69
+ idx_to_class = load_class_mapping(args.index_to_class_mapping_path)
70
+ classifier_model = load_model(args.model_path)
71
+ device = torch.device('cuda' if torch.cuda.is_available() and args.accelerator == 'gpu' else 'cpu')
72
+ classifier_model = classifier_model.to(device)
73
+
74
+ # Load reference images mapping from JSON file
75
+ try:
76
+ with open(args.reference_dict_path, 'r') as f:
77
+ reference_images = json.load(f)
78
+ except Exception as e:
79
+ raise ValueError(f"Error loading reference images from {args.reference_dict_path}: {e}")
80
+
81
+ # Handle single image or directory
82
+ image_paths = [args.unknown_image_path] if args.unknown_image_path.endswith(('.jpg', '.jpeg', '.png')) else [
83
+ os.path.join(args.unknown_image_path, img) for img in os.listdir(args.unknown_image_path)
84
+ if img.endswith(('.jpg', '.jpeg', '.png'))
85
+ ]
86
+
87
+ results = []
88
+ with torch.no_grad():
89
+ for image_path in image_paths:
90
+ image_tensor = preprocess_image(image_path, args.algorithm, args.resolution).to(device)
91
+ output = classifier_model(image_tensor)
92
+ probabilities = torch.softmax(output, dim=1)
93
+ confidence, predicted = torch.max(probabilities, 1)
94
+ predicted_class = idx_to_class.get(predicted.item(), "Unknown")
95
+
96
+ result = {'image_path': image_path, 'predicted_class': predicted_class, 'confidence': confidence.item()}
97
+
98
+ # Validate with EdgeFace embeddings if reference image exists
99
+ reference_image_path = reference_images.get(predicted_class)
100
+ if reference_image_path and os.path.exists(reference_image_path):
101
+ unknown_embedding = get_edgeface_embeddings(image_path, args.edgeface_model_name, args.edgeface_model_dir)
102
+ reference_embedding = get_edgeface_embeddings(reference_image_path, args.edgeface_model_name, args.edgeface_model_dir)
103
+ similarity = torch.nn.functional.cosine_similarity(unknown_embedding, reference_embedding).item()
104
+ result['similarity'] = similarity
105
+ result['confirmed'] = similarity >= args.similarity_threshold
106
+
107
+ results.append(result)
108
+
109
+ # {'image_path': 'tests/test_images/dont_know.jpg', 'predicted_class': 'Robert Downey Jr',
110
+ # 'confidence': 0.9292604923248291, 'similarity': 0.603316068649292, 'confirmed': True}
111
+
112
+ return results
113
+
114
+ def main(args):
115
+ results = inference_and_confirm(args)
116
+ for result in results:
117
+ print(f"Image: {result['image_path']}, Predicted Class: {result['predicted_class']}, "
118
+ f"Confidence: {result['confidence']:.4f}, Similarity: {result.get('similarity', 'N/A'):.4f}, "
119
+ f"Confirmed: {result.get('confirmed', 'N/A')}")
120
+
121
+ if __name__ == "__main__":
122
+ parser = argparse.ArgumentParser(description='Face classification with EdgeFace embedding validation.')
123
+ parser.add_argument('--unknown_image_path', type=str, required=True, help='Path to image or directory.')
124
+ parser.add_argument('--reference_dict_path', type=str, required=True, help='Path to JSON file mapping classes to reference image paths.')
125
+ parser.add_argument('--index_to_class_mapping_path', type=str, required=True, help='Path to index-to-class JSON.')
126
+ parser.add_argument('--model_path', type=str, required=True, help='Path to classifier model (.pth).')
127
+ parser.add_argument('--edgeface_model_name', type=str, default='edgeface_base', help='EdgeFace model name.')
128
+ parser.add_argument('--edgeface_model_dir', type=str, default='ckpts/idiap', help='EdgeFace model directory.')
129
+ parser.add_argument('--algorithm', type=str, default='yolo', choices=['mtcnn', 'yolo'], help='Face detection algorithm.')
130
+ parser.add_argument('--accelerator', type=str, default='auto', choices=['cpu', 'gpu', 'auto'], help='Accelerator type.')
131
+ parser.add_argument('--resolution', type=int, default=224, help='Input image resolution.')
132
+ parser.add_argument('--similarity_threshold', type=float, default=0.6, help='Cosine similarity threshold.')
133
+
134
+ args = parser.parse_args()
135
+ main(args)
136
+
137
+ # python src/slimface/inference/end2end_inference.py \
138
+ # --unknown_image_path tests/test_images/dont_know.jpg \
139
+ # --reference_dict_path tests/reference_image_data.json \
140
+ # --index_to_class_mapping_path /content/SlimFace/ckpts/index_to_class_mapping.json \
141
+ # --model_path /content/SlimFace/ckpts/SlimFace_efficientnet_b3_full_model.pth \
142
+ # --edgeface_model_name edgeface_base \
143
+ # --similarity_threshold 0.6
SlimFace/src/slimface/inference/inference.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import torchvision.transforms as transforms
5
+ from PIL import Image
6
+ import argparse
7
+ import warnings
8
+ import json
9
+
10
+ # Append the parent directory's 'models/edgeface' folder to the system path
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
12
+
13
+ from models.detection_models import align
14
+
15
+ def preprocess_image(image_path, algorithm='yolo', resolution=224):
16
+ """Preprocess a single image using face alignment and specified resolution."""
17
+ if align is None:
18
+ raise ImportError("face_alignment package is required for preprocessing.")
19
+ try:
20
+ with warnings.catch_warnings():
21
+ warnings.filterwarnings("ignore", category=FutureWarning, message=".*rcond.*")
22
+ aligned_result = align.get_aligned_face([image_path], algorithm=algorithm)
23
+ aligned_image = aligned_result[0][1] if aligned_result and len(aligned_result) > 0 else None
24
+ if aligned_image is None:
25
+ print(f"Face detection failed for {image_path}, using resized original image")
26
+ aligned_image = Image.open(image_path).convert('RGB')
27
+ aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
28
+ except Exception as e:
29
+ print(f"Error processing {image_path}: {e}")
30
+ aligned_image = Image.open(image_path).convert('RGB')
31
+ aligned_image = aligned_image.resize((resolution, resolution), Image.Resampling.LANCZOS)
32
+
33
+ transform = transforms.Compose([
34
+ transforms.ToTensor(),
35
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
36
+ ])
37
+ image_tensor = transform(aligned_image).unsqueeze(0) # Add batch dimension
38
+ return image_tensor
39
+
40
+ def load_model(model_path):
41
+ """Load the trained model in TorchScript format."""
42
+ try:
43
+ model = torch.jit.load(model_path, map_location=torch.device('cpu'))
44
+ model.eval()
45
+ return model
46
+ except Exception as e:
47
+ raise RuntimeError(f"Failed to load TorchScript model from {model_path}: {e}")
48
+
49
+ def load_class_mapping(index_to_class_mapping_path):
50
+ """Load class-to-index mapping from the JSON file."""
51
+ try:
52
+ with open(index_to_class_mapping_path, 'r') as f:
53
+ idx_to_class = json.load(f)
54
+ # Convert string keys (from JSON) to integers
55
+ idx_to_class = {int(k): v for k, v in idx_to_class.items()}
56
+ return idx_to_class
57
+ except FileNotFoundError:
58
+ raise FileNotFoundError(f"Index to class mapping file {index_to_class_mapping_path} not found.")
59
+ except Exception as e:
60
+ raise ValueError(f"Error loading index to class mapping: {e}")
61
+
62
+ def inference(args):
63
+ # Load class mapping from JSON file
64
+ idx_to_class = load_class_mapping(args.index_to_class_mapping_path)
65
+
66
+ # Load model
67
+ model = load_model(args.model_path)
68
+
69
+ # Process input images
70
+ device = torch.device('cuda' if torch.cuda.is_available() and args.accelerator == 'gpu' else 'cpu')
71
+ model = model.to(device)
72
+
73
+ image_paths = []
74
+ if os.path.isdir(args.input_path):
75
+ for img_name in os.listdir(args.input_path):
76
+ if img_name.endswith(('.jpg', '.jpeg', '.png')):
77
+ image_paths.append(os.path.join(args.input_path, img_name))
78
+ else:
79
+ if args.input_path.endswith(('.jpg', '.jpeg', '.png')):
80
+ image_paths.append(args.input_path)
81
+ else:
82
+ raise ValueError("Input path must be a directory or a valid image file.")
83
+
84
+ # Perform inference
85
+ results = []
86
+ with torch.no_grad():
87
+ for image_path in image_paths:
88
+ image_tensor = preprocess_image(image_path, algorithm=args.algorithm, resolution=args.resolution)
89
+ image_tensor = image_tensor.to(device)
90
+ output = model(image_tensor)
91
+ probabilities = torch.softmax(output, dim=1)
92
+ confidence, predicted = torch.max(probabilities, 1)
93
+ predicted_class = idx_to_class.get(predicted.item(), "Unknown")
94
+ results.append({
95
+ 'image_path': image_path,
96
+ 'predicted_class': predicted_class,
97
+ 'confidence': confidence.item()
98
+ })
99
+ def main(args):
100
+ results = inference(args)
101
+ # Output results
102
+ for result in results:
103
+ print(f"Image: {result['image_path']}")
104
+ print(f"Predicted Class: {result['predicted_class']}")
105
+ print(f"Confidence: {result['confidence']:.4f}")
106
+
107
+
108
+ if __name__ == '__main__':
109
+ parser = argparse.ArgumentParser(description='Perform inference with a trained face classification model.')
110
+ parser.add_argument('--input_path', type=str, required=True,
111
+ help='Path to an image or directory of images for inference.')
112
+ parser.add_argument('--index_to_class_mapping_path', type=str, required=True,
113
+ help='Path to the JSON file containing index to class mapping.')
114
+ parser.add_argument('--model_path', type=str, required=True,
115
+ help='Path to the trained full model in TorchScript format (.pth file).')
116
+ parser.add_argument('--algorithm', type=str, default='yolo',
117
+ choices=['mtcnn', 'yolo'],
118
+ help='Face detection algorithm to use (mtcnn or yolo).')
119
+ parser.add_argument('--accelerator', type=str, default='auto',
120
+ choices=['cpu', 'gpu', 'auto'],
121
+ help='Accelerator type for inference.')
122
+ parser.add_argument('--resolution', type=int, default=224,
123
+ help='Resolution for input images (default: 224).')
124
+
125
+ args = parser.parse_args()
126
+ main(args)
SlimFace/src/slimface/models/__init__.py ADDED
File without changes
SlimFace/src/slimface/models/classification_models/__init__.py ADDED
File without changes
SlimFace/src/slimface/models/classification_models/alls.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class FaceClassifier(nn.Module):
5
+ """Face classification model with a configurable head."""
6
+ def __init__(self, base_model, num_classes, model_name, model_configs):
7
+ super(FaceClassifier, self).__init__()
8
+ self.base_model = base_model
9
+ self.model_name = model_name
10
+
11
+ # Determine the feature extraction method and output shape
12
+ with torch.no_grad():
13
+ dummy_input = torch.zeros(1, 3, model_configs[model_name]['resolution'], model_configs[model_name]['resolution'])
14
+ features = base_model(dummy_input)
15
+ if len(features.shape) == 4: # Spatial feature map (batch, channels, height, width)
16
+ in_channels = features.shape[1]
17
+ self.feature_type = 'spatial'
18
+ self.feature_dim = in_channels
19
+ elif len(features.shape) == 2: # Flattened feature vector (batch, features)
20
+ in_channels = features.shape[1]
21
+ self.feature_type = 'flat'
22
+ self.feature_dim = in_channels
23
+ else:
24
+ raise ValueError(f"Unexpected feature shape from base model {model_name}: {features.shape}")
25
+
26
+ # Define the classifier head based on feature type
27
+ if self.feature_type == 'flat' or 'vit' in model_name:
28
+ self.conv_head = nn.Sequential(
29
+ nn.Linear(self.feature_dim, 512),
30
+ nn.BatchNorm1d(512),
31
+ nn.ReLU(),
32
+ nn.Dropout(0.5),
33
+ nn.Linear(512, 256),
34
+ nn.BatchNorm1d(256),
35
+ nn.ReLU(),
36
+ nn.Linear(256, num_classes)
37
+ )
38
+ else:
39
+ self.conv_head = nn.Sequential(
40
+ nn.Conv2d(self.feature_dim, 512, kernel_size=3, padding=1),
41
+ nn.BatchNorm2d(512),
42
+ nn.ReLU(),
43
+ nn.Dropout2d(0.5),
44
+ nn.Conv2d(512, 256, kernel_size=3, padding=1),
45
+ nn.BatchNorm2d(256),
46
+ nn.ReLU(),
47
+ nn.AdaptiveAvgPool2d(1),
48
+ nn.Flatten(),
49
+ nn.Linear(256, num_classes)
50
+ )
51
+
52
+ def forward(self, x):
53
+ features = self.base_model(x)
54
+ output = self.conv_head(features)
55
+ return output
SlimFace/src/slimface/models/classification_models/efficient_v1.py ADDED
File without changes
SlimFace/src/slimface/models/classification_models/efficient_v2.py ADDED
File without changes
SlimFace/src/slimface/models/classification_models/regnet.py ADDED
File without changes
SlimFace/src/slimface/models/classification_models/vit.py ADDED
File without changes
SlimFace/src/slimface/models/detection_models/align.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from typing import Union, List, Tuple
4
+ from . import mtcnn
5
+ from .face_yolo import face_yolo_detection
6
+
7
+ # Device configuration
8
+ DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Initialize MTCNN model
11
+ MTCNN_MODEL = mtcnn.MTCNN(device=DEVICE, crop_size=(112, 112))
12
+
13
+ def add_image_padding(pil_img: Image.Image, top: int, right: int, bottom: int, left: int,
14
+ color: Tuple[int, int, int] = (0, 0, 0)) -> Image.Image:
15
+ """Add padding to a PIL image."""
16
+ width, height = pil_img.size
17
+ new_width, new_height = width + right + left, height + top + bottom
18
+ padded_img = Image.new(pil_img.mode, (new_width, new_height), color)
19
+ padded_img.paste(pil_img, (left, top))
20
+ return padded_img
21
+
22
+ def detect_faces_mtcnn(image: Union[str, Image.Image]) -> Tuple[Union[list, None], Union[Image.Image, None]]:
23
+ """Detect and align faces using MTCNN model."""
24
+ if isinstance(image, str):
25
+ image = Image.open(image).convert('RGB')
26
+
27
+ if not isinstance(image, Image.Image):
28
+ raise TypeError("Input must be a PIL Image or path to an image")
29
+
30
+ try:
31
+ bboxes, faces = MTCNN_MODEL.align_multi(image, limit=1)
32
+ return bboxes[0] if bboxes else None, faces[0] if faces else None
33
+ except Exception as e:
34
+ print(f"MTCNN face detection failed: {e}")
35
+ return None, None
36
+
37
+ def get_aligned_face(image_input: Union[str, List[str]],
38
+ algorithm: str = 'mtcnn') -> List[Tuple[Union[list, None], Union[Image.Image, None]]]:
39
+ """Get aligned faces from image(s) using specified algorithm."""
40
+ if algorithm not in ['mtcnn', 'yolo']:
41
+ raise ValueError("Algorithm must be 'mtcnn' or 'yolo'")
42
+
43
+ # Convert single image path to list for consistent processing
44
+ image_paths = [image_input] if isinstance(image_input, str) else image_input
45
+ if not isinstance(image_paths, list):
46
+ raise TypeError("Input must be a string or list of strings")
47
+
48
+ if algorithm == 'mtcnn':
49
+ return [detect_faces_mtcnn(path) for path in image_paths]
50
+
51
+ # YOLO detection
52
+ results = face_yolo_detection(
53
+ image_paths,
54
+ use_batch=True,
55
+ device=DEVICE
56
+ )
57
+ return list(results)
SlimFace/src/slimface/models/detection_models/face_yolo.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics import YOLO
2
+ import cv2
3
+ import os
4
+ from PIL import Image
5
+ import numpy as np
6
+ import glob
7
+ import sys
8
+ import argparse
9
+ import torch
10
+
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
12
+
13
+ from utils import download_yolo_face_detection
14
+
15
+ def initialize_yolo_model(yolo_model_path):
16
+ """Initialize YOLO model with specified device."""
17
+ # if device.startswith('cuda') and not torch.cuda.is_available():
18
+ # print("Warning: CUDA not available, falling back to CPU.")
19
+ # device = 'cpu'
20
+ if not os.path.exists(yolo_model_path):
21
+ download_yolo_face_detection.download_yolo_face_detection_model()
22
+ return YOLO(yolo_model_path)
23
+
24
+ def process_image_results(image, image_rgb, boxes):
25
+ """Process bounding boxes and crop faces for a single image."""
26
+ bounding_boxes, cropped_faces = [], []
27
+ for box in boxes:
28
+ x1, y1, x2, y2 = map(int, box)
29
+ if x2 > x1 and y2 > y1 and x1 >= 0 and y1 >= 0 and x2 <= image.shape[1] and y2 <= image.shape[0]:
30
+ bounding_boxes.append([x1, y1, x2, y2])
31
+ cropped_face = image_rgb[y1:y2, x1:x2]
32
+ if cropped_face.size > 0:
33
+ pil_image = Image.fromarray(cropped_face).resize((112, 112), Image.Resampling.BILINEAR)
34
+ cropped_faces.append(pil_image)
35
+ return np.array(bounding_boxes, dtype=np.int32) if bounding_boxes else np.empty((0, 4), dtype=np.int32), cropped_faces
36
+
37
+ def process_batch(model, image_paths, all_bounding_boxes, all_cropped_faces, device):
38
+ """Process images in batch mode using list comprehensions for efficiency."""
39
+ # Validate and load images, filter out invalid ones
40
+ valid_data = [(cv2.imread(path), path) for path in image_paths if os.path.exists(path)]
41
+ valid_images, valid_image_paths = zip(*[(img, path) for img, path in valid_data if img is not None]) if valid_data else ([], [])
42
+
43
+ # Append empty results for invalid images
44
+ for path in image_paths:
45
+ if not os.path.exists(path) or cv2.imread(path) is None:
46
+ all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
47
+ all_cropped_faces.append([])
48
+ print(f"Warning: {'not found' if not os.path.exists(path) else 'failed to load'} {path}. Skipping.")
49
+
50
+ # Process valid images
51
+ if valid_images:
52
+ images_rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in valid_images]
53
+ results = model.predict(source=valid_image_paths, conf=0.25, iou=0.45, verbose=False, device=device)
54
+
55
+ # Process results with comprehension
56
+ for img, rgb, result in zip(valid_images, images_rgb, results):
57
+ bboxes, faces = process_image_results(img, rgb, result.boxes.xyxy.cpu().numpy())
58
+ all_bounding_boxes.append(bboxes)
59
+ all_cropped_faces.append(faces[0] if faces else [])
60
+
61
+ def process_individual(model, image_paths, all_bounding_boxes, all_cropped_faces, device):
62
+ """Process images individually."""
63
+ for image_path in image_paths:
64
+ if not os.path.exists(image_path):
65
+ print(f"Warning: {image_path} not found. Skipping.")
66
+ all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
67
+ all_cropped_faces.append([])
68
+ continue
69
+
70
+ image = cv2.imread(image_path)
71
+ if image is None:
72
+ print(f"Warning: Failed to load {image_path}. Skipping.")
73
+ all_bounding_boxes.append(np.empty((0, 4), dtype=np.int32))
74
+ all_cropped_faces.append([])
75
+ continue
76
+
77
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
78
+ results = model(image_path, conf=0.25, iou=0.45, verbose=False, device=device)
79
+
80
+ for result in results:
81
+ boxes = result.boxes.xyxy.cpu().numpy()
82
+ bboxes, faces = process_image_results(image, image_rgb, boxes)
83
+ all_bounding_boxes.append(bboxes)
84
+ all_cropped_faces.append(faces[0] if faces else [])
85
+
86
+ def face_yolo_detection(image_paths,
87
+ yolo_model_path="./ckpts/yolo_face_detection/model.pt",
88
+ use_batch=True, device='cuda'):
89
+ """Perform face detection using YOLOv11 with batch or individual processing on specified device."""
90
+ model = initialize_yolo_model(yolo_model_path)
91
+ all_bounding_boxes, all_cropped_faces = [], []
92
+
93
+ if use_batch:
94
+ process_batch(model, image_paths, all_bounding_boxes, all_cropped_faces, device)
95
+ else:
96
+ process_individual(model, image_paths, all_bounding_boxes, all_cropped_faces, device)
97
+
98
+ return zip(all_bounding_boxes, all_cropped_faces)
99
+
100
+ if __name__ == "__main__":
101
+ parser = argparse.ArgumentParser(description="YOLOv11 face detection")
102
+ parser.add_argument("--use-batch", action="store_true", default=True, help="Use batch processing (default: True)")
103
+ parser.add_argument("--image-dir", type=str, default="test/test_images", help="Input image directory")
104
+ parser.add_argument("--yolo-model-path", type=str, default="checkpoints/yolo11_face_detection/model.pt", help="YOLO model path")
105
+ parser.add_argument("--device", type=str, default="cuda", help="Device to run the model (e.g., 'cuda', 'cpu', 'cuda:0')")
106
+
107
+ args = parser.parse_args()
108
+
109
+ image_paths = (glob.glob(os.path.join(args.image_dir, "*.[jJ][pP][gG]")) +
110
+ glob.glob(os.path.join(args.image_dir, "*.[pP][nN][gG]")))
111
+
112
+ if args.yolo_model_path:
113
+ yolo_model_path = args.yolo_model_path
114
+ else:
115
+ yolo_model_path = os.path.join("checkpoints", "yolo11_face_detection", "model.pt")
116
+
117
+ import time
118
+ t1 = time.time()
119
+ results = face_yolo_detection(image_paths, yolo_model_path, args.use_batch, args.device)
120
+ print("Time taken:", time.time() - t1)
121
+
122
+ # Optional: Save or process results
123
+ # for i, (bboxes, faces) in enumerate(results):
124
+ # print(f"Image {i}: Bounding Boxes: {bboxes}")
125
+ # for j, face in enumerate(faces):
126
+ # face.save(f"face_{i}_{j}.png")
127
+
128
+ # Benchmarking (uncomment to use)
129
+ # import time
130
+ # num_runs = 50
131
+ # batch_times, individual_times = [], []
132
+
133
+ # # Benchmark batch processing
134
+ # for _ in range(num_runs):
135
+ # t1 = time.time()
136
+ # face_yolo_detection(image_paths, yolo_model_path, use_batch=True, device=args.device)
137
+ # batch_times.append(time.time() - t1)
138
+
139
+ # # Benchmark individual processing
140
+ # for _ in range(num_runs):
141
+ # t1 = time.time()
142
+ # face_yolo_detection(image_paths, yolo_model_path, use_batch=False, device=args.device)
143
+ # individual_times.append(time.time() - t1)
144
+
145
+ # # Calculate and print average times
146
+ # avg_batch_time = sum(batch_times) / num_runs
147
+ # avg_individual_time = sum(individual_times) / num_runs
148
+
149
+ # print(f"\nBenchmark Results (over {num_runs} runs):")
150
+ # print(f"Average Batch Processing Time: {avg_batch_time:.4f} seconds")
151
+ # print(f"Average Individual Processing Time: {avg_individual_time:.4f} seconds")
SlimFace/src/slimface/models/detection_models/mtcnn.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+ from torch.autograd import Variable
6
+
7
+ import sys
8
+ import os
9
+
10
+ sys.path.insert(0, os.path.dirname(__file__))
11
+
12
+ from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
13
+ from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
14
+ from mtcnn_pytorch.src.first_stage import run_first_stage
15
+ from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
16
+
17
+
18
+ class MTCNN():
19
+ def __init__(self, device: str = 'cuda:0', crop_size: Tuple[int, int] = (112, 112)):
20
+
21
+ assert device in ['cuda:0', 'cpu']
22
+ self.device = torch.device(device)
23
+ assert crop_size in [(112, 112), (96, 112)]
24
+ self.crop_size = crop_size
25
+
26
+ # change working dir to this file location to load npz files. Then switch back
27
+ cwd = os.getcwd()
28
+ os.chdir(os.path.dirname(__file__))
29
+
30
+ self.pnet = PNet().to(self.device)
31
+ self.rnet = RNet().to(self.device)
32
+ self.onet = ONet().to(self.device)
33
+ self.pnet.eval()
34
+ self.rnet.eval()
35
+ self.onet.eval()
36
+ self.refrence = get_reference_facial_points(default_square=crop_size[0] == crop_size[1])
37
+
38
+ self.min_face_size = 20
39
+ self.thresholds = [0.6,0.7,0.9]
40
+ self.nms_thresholds = [0.7, 0.7, 0.7]
41
+ self.factor = 0.85
42
+
43
+
44
+ os.chdir(cwd)
45
+
46
+ def align(self, img):
47
+ _, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
48
+ facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
49
+ warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
50
+ return Image.fromarray(warped_face)
51
+
52
+ def align_multi(self, img, limit=None):
53
+ boxes, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
54
+ if limit:
55
+ boxes = boxes[:limit]
56
+ landmarks = landmarks[:limit]
57
+ faces = []
58
+ for landmark in landmarks:
59
+ facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
60
+ warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
61
+ faces.append(Image.fromarray(warped_face))
62
+ return boxes, faces
63
+
64
+ def detect_faces(self, image, min_face_size, thresholds, nms_thresholds, factor):
65
+ """
66
+ Arguments:
67
+ image: an instance of PIL.Image.
68
+ min_face_size: a float number.
69
+ thresholds: a list of length 3.
70
+ nms_thresholds: a list of length 3.
71
+
72
+ Returns:
73
+ two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
74
+ bounding boxes and facial landmarks.
75
+ """
76
+
77
+ # BUILD AN IMAGE PYRAMID
78
+ width, height = image.size
79
+ min_length = min(height, width)
80
+
81
+ min_detection_size = 12
82
+ # factor = 0.707 # sqrt(0.5)
83
+
84
+ # scales for scaling the image
85
+ scales = []
86
+
87
+ # scales the image so that
88
+ # minimum size that we can detect equals to
89
+ # minimum face size that we want to detect
90
+ m = min_detection_size / min_face_size
91
+ min_length *= m
92
+
93
+ factor_count = 0
94
+ while min_length > min_detection_size:
95
+ scales.append(m * factor**factor_count)
96
+ min_length *= factor
97
+ factor_count += 1
98
+
99
+ # STAGE 1
100
+
101
+ # it will be returned
102
+ bounding_boxes = []
103
+
104
+ with torch.no_grad():
105
+ # run P-Net on different scales
106
+ for s in scales:
107
+ boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
108
+ bounding_boxes.append(boxes)
109
+
110
+ # collect boxes (and offsets, and scores) from different scales
111
+ bounding_boxes = [i for i in bounding_boxes if i is not None]
112
+ if len(bounding_boxes) == 0:
113
+ return [], []
114
+ bounding_boxes = np.vstack(bounding_boxes)
115
+
116
+ keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
117
+ bounding_boxes = bounding_boxes[keep]
118
+
119
+ # use offsets predicted by pnet to transform bounding boxes
120
+ bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
121
+ # shape [n_boxes, 5]
122
+
123
+ bounding_boxes = convert_to_square(bounding_boxes)
124
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
125
+
126
+ # STAGE 2
127
+
128
+ img_boxes = get_image_boxes(bounding_boxes, image, size=24)
129
+ img_boxes = torch.FloatTensor(img_boxes).to(self.device)
130
+
131
+ output = self.rnet(img_boxes)
132
+ offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
133
+ probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]
134
+
135
+ keep = np.where(probs[:, 1] > thresholds[1])[0]
136
+ bounding_boxes = bounding_boxes[keep]
137
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
138
+ offsets = offsets[keep]
139
+
140
+ keep = nms(bounding_boxes, nms_thresholds[1])
141
+ bounding_boxes = bounding_boxes[keep]
142
+ bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
143
+ bounding_boxes = convert_to_square(bounding_boxes)
144
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
145
+
146
+ # STAGE 3
147
+
148
+ img_boxes = get_image_boxes(bounding_boxes, image, size=48)
149
+ if len(img_boxes) == 0:
150
+ return [], []
151
+ img_boxes = torch.FloatTensor(img_boxes).to(self.device)
152
+ output = self.onet(img_boxes)
153
+ landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
154
+ offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
155
+ probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]
156
+
157
+ keep = np.where(probs[:, 1] > thresholds[2])[0]
158
+ bounding_boxes = bounding_boxes[keep]
159
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
160
+ offsets = offsets[keep]
161
+ landmarks = landmarks[keep]
162
+
163
+ # compute landmark points
164
+ width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
165
+ height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
166
+ xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
167
+ landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
168
+ landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
169
+
170
+ bounding_boxes = calibrate_box(bounding_boxes, offsets)
171
+ keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
172
+ bounding_boxes = bounding_boxes[keep]
173
+ landmarks = landmarks[keep]
174
+
175
+ return bounding_boxes, landmarks
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .ipynb_checkpoints
2
+ __pycache__
3
+
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Dan Antoshchenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MTCNN
2
+
3
+ `pytorch` implementation of **inference stage** of face detection algorithm described in
4
+ [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
5
+
6
+ ## Example
7
+ ![example of a face detection](images/example.png)
8
+
9
+ ## How to use it
10
+ Just download the repository and then do this
11
+ ```python
12
+ from src import detect_faces
13
+ from PIL import Image
14
+
15
+ image = Image.open('image.jpg')
16
+ bounding_boxes, landmarks = detect_faces(image)
17
+ ```
18
+ For examples see `test_on_images.ipynb`.
19
+
20
+ ## Requirements
21
+ * pytorch 0.2
22
+ * Pillow, numpy
23
+
24
+ ## Credit
25
+ This implementation is heavily inspired by:
26
+ * [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.caffemodel ADDED
Binary file (28.2 kB). View file
 
SlimFace/src/slimface/models/detection_models/mtcnn_pytorch/caffe_models/det1.prototxt ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "PNet"
2
+ input: "data"
3
+ input_dim: 1
4
+ input_dim: 3
5
+ input_dim: 12
6
+ input_dim: 12
7
+
8
+ layer {
9
+ name: "conv1"
10
+ type: "Convolution"
11
+ bottom: "data"
12
+ top: "conv1"
13
+ param {
14
+ lr_mult: 1
15
+ decay_mult: 1
16
+ }
17
+ param {
18
+ lr_mult: 2
19
+ decay_mult: 0
20
+ }
21
+ convolution_param {
22
+ num_output: 10
23
+ kernel_size: 3
24
+ stride: 1
25
+ weight_filler {
26
+ type: "xavier"
27
+ }
28
+ bias_filler {
29
+ type: "constant"
30
+ value: 0
31
+ }
32
+ }
33
+ }
34
+ layer {
35
+ name: "PReLU1"
36
+ type: "PReLU"
37
+ bottom: "conv1"
38
+ top: "conv1"
39
+ }
40
+ layer {
41
+ name: "pool1"
42
+ type: "Pooling"
43
+ bottom: "conv1"
44
+ top: "pool1"
45
+ pooling_param {
46
+ pool: MAX
47
+ kernel_size: 2
48
+ stride: 2
49
+ }
50
+ }
51
+
52
+ layer {
53
+ name: "conv2"
54
+ type: "Convolution"
55
+ bottom: "pool1"
56
+ top: "conv2"
57
+ param {
58
+ lr_mult: 1
59
+ decay_mult: 1
60
+ }
61
+ param {
62
+ lr_mult: 2
63
+ decay_mult: 0
64
+ }
65
+ convolution_param {
66
+ num_output: 16
67
+ kernel_size: 3
68
+ stride: 1
69
+ weight_filler {
70
+ type: "xavier"
71
+ }
72
+ bias_filler {
73
+ type: "constant"
74
+ value: 0
75
+ }
76
+ }
77
+ }
78
+ layer {
79
+ name: "PReLU2"
80
+ type: "PReLU"
81
+ bottom: "conv2"
82
+ top: "conv2"
83
+ }
84
+
85
+ layer {
86
+ name: "conv3"
87
+ type: "Convolution"
88
+ bottom: "conv2"
89
+ top: "conv3"
90
+ param {
91
+ lr_mult: 1
92
+ decay_mult: 1
93
+ }
94
+ param {
95
+ lr_mult: 2
96
+ decay_mult: 0
97
+ }
98
+ convolution_param {
99
+ num_output: 32
100
+ kernel_size: 3
101
+ stride: 1
102
+ weight_filler {
103
+ type: "xavier"
104
+ }
105
+ bias_filler {
106
+ type: "constant"
107
+ value: 0
108
+ }
109
+ }
110
+ }
111
+ layer {
112
+ name: "PReLU3"
113
+ type: "PReLU"
114
+ bottom: "conv3"
115
+ top: "conv3"
116
+ }
117
+
118
+
119
+ layer {
120
+ name: "conv4-1"
121
+ type: "Convolution"
122
+ bottom: "conv3"
123
+ top: "conv4-1"
124
+ param {
125
+ lr_mult: 1
126
+ decay_mult: 1
127
+ }
128
+ param {
129
+ lr_mult: 2
130
+ decay_mult: 0
131
+ }
132
+ convolution_param {
133
+ num_output: 2
134
+ kernel_size: 1
135
+ stride: 1
136
+ weight_filler {
137
+ type: "xavier"
138
+ }
139
+ bias_filler {
140
+ type: "constant"
141
+ value: 0
142
+ }
143
+ }
144
+ }
145
+
146
+ layer {
147
+ name: "conv4-2"
148
+ type: "Convolution"
149
+ bottom: "conv3"
150
+ top: "conv4-2"
151
+ param {
152
+ lr_mult: 1
153
+ decay_mult: 1
154
+ }
155
+ param {
156
+ lr_mult: 2
157
+ decay_mult: 0
158
+ }
159
+ convolution_param {
160
+ num_output: 4
161
+ kernel_size: 1
162
+ stride: 1
163
+ weight_filler {
164
+ type: "xavier"
165
+ }
166
+ bias_filler {
167
+ type: "constant"
168
+ value: 0
169
+ }
170
+ }
171
+ }
172
+ layer {
173
+ name: "prob1"
174
+ type: "Softmax"
175
+ bottom: "conv4-1"
176
+ top: "prob1"
177
+ }