Upload 9 files
Browse files- .gitignore +285 -0
- LICENSE +21 -0
- README.md +213 -1
- __init__.py +0 -0
- classify.py +79 -0
- project-statement.md +42 -0
- recommend.py +108 -0
- requirements.txt +20 -0
.gitignore
ADDED
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,jetbrains+all
|
2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks,jetbrains+all
|
3 |
+
|
4 |
+
### JetBrains+all ###
|
5 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
6 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
7 |
+
|
8 |
+
# User-specific stuff
|
9 |
+
.idea/**/workspace.xml
|
10 |
+
.idea/**/tasks.xml
|
11 |
+
.idea/**/usage.statistics.xml
|
12 |
+
.idea/**/dictionaries
|
13 |
+
.idea/**/shelf
|
14 |
+
|
15 |
+
# AWS User-specific
|
16 |
+
.idea/**/aws.xml
|
17 |
+
|
18 |
+
# Generated files
|
19 |
+
.idea/**/contentModel.xml
|
20 |
+
|
21 |
+
# Sensitive or high-churn files
|
22 |
+
.idea/**/dataSources/
|
23 |
+
.idea/**/dataSources.ids
|
24 |
+
.idea/**/dataSources.local.xml
|
25 |
+
.idea/**/sqlDataSources.xml
|
26 |
+
.idea/**/dynamic.xml
|
27 |
+
.idea/**/uiDesigner.xml
|
28 |
+
.idea/**/dbnavigator.xml
|
29 |
+
|
30 |
+
# Gradle
|
31 |
+
.idea/**/gradle.xml
|
32 |
+
.idea/**/libraries
|
33 |
+
|
34 |
+
# Gradle and Maven with auto-import
|
35 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
36 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
37 |
+
# auto-import.
|
38 |
+
# .idea/artifacts
|
39 |
+
# .idea/compiler.xml
|
40 |
+
# .idea/jarRepositories.xml
|
41 |
+
# .idea/modules.xml
|
42 |
+
# .idea/*.iml
|
43 |
+
# .idea/modules
|
44 |
+
# *.iml
|
45 |
+
# *.ipr
|
46 |
+
|
47 |
+
# CMake
|
48 |
+
cmake-build-*/
|
49 |
+
|
50 |
+
# Mongo Explorer plugin
|
51 |
+
.idea/**/mongoSettings.xml
|
52 |
+
|
53 |
+
# File-based project format
|
54 |
+
*.iws
|
55 |
+
|
56 |
+
# IntelliJ
|
57 |
+
out/
|
58 |
+
|
59 |
+
# mpeltonen/sbt-idea plugin
|
60 |
+
.idea_modules/
|
61 |
+
|
62 |
+
# JIRA plugin
|
63 |
+
atlassian-ide-plugin.xml
|
64 |
+
|
65 |
+
# Cursive Clojure plugin
|
66 |
+
.idea/replstate.xml
|
67 |
+
|
68 |
+
# SonarLint plugin
|
69 |
+
.idea/sonarlint/
|
70 |
+
|
71 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
72 |
+
com_crashlytics_export_strings.xml
|
73 |
+
crashlytics.properties
|
74 |
+
crashlytics-build.properties
|
75 |
+
fabric.properties
|
76 |
+
|
77 |
+
# Editor-based Rest Client
|
78 |
+
.idea/httpRequests
|
79 |
+
|
80 |
+
# Android studio 3.1+ serialized cache file
|
81 |
+
.idea/caches/build_file_checksums.ser
|
82 |
+
|
83 |
+
### JetBrains+all Patch ###
|
84 |
+
# Ignore everything but code style settings and run configurations
|
85 |
+
# that are supposed to be shared within teams.
|
86 |
+
|
87 |
+
.idea/*
|
88 |
+
|
89 |
+
!.idea/codeStyles
|
90 |
+
!.idea/runConfigurations
|
91 |
+
.idea/
|
92 |
+
|
93 |
+
### JupyterNotebooks ###
|
94 |
+
# gitignore template for Jupyter Notebooks
|
95 |
+
# website: http://jupyter.org/
|
96 |
+
|
97 |
+
.ipynb_checkpoints
|
98 |
+
*/.ipynb_checkpoints/*
|
99 |
+
|
100 |
+
# IPython
|
101 |
+
profile_default/
|
102 |
+
ipython_config.py
|
103 |
+
|
104 |
+
# Remove previous ipynb_checkpoints
|
105 |
+
# git rm -r .ipynb_checkpoints/
|
106 |
+
|
107 |
+
# Dataset
|
108 |
+
data/
|
109 |
+
scraping/*/
|
110 |
+
scraping/*.zip
|
111 |
+
|
112 |
+
# Machine Learning
|
113 |
+
log/
|
114 |
+
models/
|
115 |
+
|
116 |
+
### Python ###
|
117 |
+
# Byte-compiled / optimized / DLL files
|
118 |
+
__pycache__/
|
119 |
+
*.py[cod]
|
120 |
+
*$py.class
|
121 |
+
|
122 |
+
# C extensions
|
123 |
+
*.so
|
124 |
+
|
125 |
+
# Distribution / packaging
|
126 |
+
.Python
|
127 |
+
build/
|
128 |
+
develop-eggs/
|
129 |
+
dist/
|
130 |
+
downloads/
|
131 |
+
eggs/
|
132 |
+
.eggs/
|
133 |
+
lib/
|
134 |
+
lib64/
|
135 |
+
parts/
|
136 |
+
sdist/
|
137 |
+
var/
|
138 |
+
wheels/
|
139 |
+
share/python-wheels/
|
140 |
+
*.egg-info/
|
141 |
+
.installed.cfg
|
142 |
+
*.egg
|
143 |
+
MANIFEST
|
144 |
+
|
145 |
+
# PyInstaller
|
146 |
+
# Usually these files are written by a python script from a template
|
147 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
148 |
+
*.manifest
|
149 |
+
*.spec
|
150 |
+
|
151 |
+
# Installer logs
|
152 |
+
pip-log.txt
|
153 |
+
pip-delete-this-directory.txt
|
154 |
+
|
155 |
+
# Unit test / coverage reports
|
156 |
+
htmlcov/
|
157 |
+
.tox/
|
158 |
+
.nox/
|
159 |
+
.coverage
|
160 |
+
.coverage.*
|
161 |
+
.cache
|
162 |
+
nosetests.xml
|
163 |
+
coverage.xml
|
164 |
+
*.cover
|
165 |
+
*.py,cover
|
166 |
+
.hypothesis/
|
167 |
+
.pytest_cache/
|
168 |
+
cover/
|
169 |
+
|
170 |
+
# Translations
|
171 |
+
*.mo
|
172 |
+
*.pot
|
173 |
+
|
174 |
+
# Django stuff:
|
175 |
+
*.log
|
176 |
+
local_settings.py
|
177 |
+
db.sqlite3
|
178 |
+
db.sqlite3-journal
|
179 |
+
|
180 |
+
# Flask stuff:
|
181 |
+
instance/
|
182 |
+
.webassets-cache
|
183 |
+
|
184 |
+
# Scrapy stuff:
|
185 |
+
.scrapy
|
186 |
+
|
187 |
+
# Sphinx documentation
|
188 |
+
docs/_build/
|
189 |
+
|
190 |
+
# PyBuilder
|
191 |
+
.pybuilder/
|
192 |
+
target/
|
193 |
+
|
194 |
+
# Jupyter Notebook
|
195 |
+
|
196 |
+
# IPython
|
197 |
+
|
198 |
+
# pyenv
|
199 |
+
# For a library or package, you might want to ignore these files since the code is
|
200 |
+
# intended to run in multiple environments; otherwise, check them in:
|
201 |
+
# .python-version
|
202 |
+
|
203 |
+
# pipenv
|
204 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
205 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
206 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
207 |
+
# install all needed dependencies.
|
208 |
+
#Pipfile.lock
|
209 |
+
|
210 |
+
# poetry
|
211 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
212 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
213 |
+
# commonly ignored for libraries.
|
214 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
215 |
+
#poetry.lock
|
216 |
+
|
217 |
+
# pdm
|
218 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
219 |
+
#pdm.lock
|
220 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
221 |
+
# in version control.
|
222 |
+
# https://pdm.fming.dev/#use-with-ide
|
223 |
+
.pdm.toml
|
224 |
+
|
225 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
226 |
+
__pypackages__/
|
227 |
+
|
228 |
+
# Celery stuff
|
229 |
+
celerybeat-schedule
|
230 |
+
celerybeat.pid
|
231 |
+
|
232 |
+
# SageMath parsed files
|
233 |
+
*.sage.py
|
234 |
+
|
235 |
+
# Environments
|
236 |
+
.env
|
237 |
+
.venv
|
238 |
+
env/
|
239 |
+
venv/
|
240 |
+
ENV/
|
241 |
+
env.bak/
|
242 |
+
venv.bak/
|
243 |
+
|
244 |
+
# Spyder project settings
|
245 |
+
.spyderproject
|
246 |
+
.spyproject
|
247 |
+
|
248 |
+
# Rope project settings
|
249 |
+
.ropeproject
|
250 |
+
|
251 |
+
# mkdocs documentation
|
252 |
+
/site
|
253 |
+
|
254 |
+
# mypy
|
255 |
+
.mypy_cache/
|
256 |
+
.dmypy.json
|
257 |
+
dmypy.json
|
258 |
+
|
259 |
+
# Pyre type checker
|
260 |
+
.pyre/
|
261 |
+
|
262 |
+
# pytype static type analyzer
|
263 |
+
.pytype/
|
264 |
+
|
265 |
+
# Cython debug symbols
|
266 |
+
cython_debug/
|
267 |
+
|
268 |
+
# PyCharm
|
269 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
270 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
271 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
272 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
273 |
+
#.idea/
|
274 |
+
|
275 |
+
### Python Patch ###
|
276 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
277 |
+
poetry.toml
|
278 |
+
|
279 |
+
# ruff
|
280 |
+
.ruff_cache/
|
281 |
+
|
282 |
+
# LSP config files
|
283 |
+
pyrightconfig.json
|
284 |
+
|
285 |
+
# End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,jetbrains+all
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Tuong-Minh (Mike) Vo
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,3 +1,215 @@
|
|
|
|
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BloomSage Machine Learning & MLOps Backend Component
|
2 |
+
|
3 |
---
|
4 |
+
|
5 |
+
|
6 |
+
## Project Structure
|
7 |
+
|
8 |
+
|
9 |
+
```
|
10 |
+
.
|
11 |
+
├── font/
|
12 |
+
├── notebooks/
|
13 |
+
│ ├── images/
|
14 |
+
│ ├── Step1.EDA.ipynb
|
15 |
+
│ ├── Step2.DataPrep.ipynb
|
16 |
+
│ └── Step3.Classifier-BaselineModel.ipynb
|
17 |
+
├── scraping/
|
18 |
+
│ └── scrape.py
|
19 |
+
├── classify.py
|
20 |
+
├── recommend.py
|
21 |
+
├── requirements.txt
|
22 |
+
├── .gitignore
|
23 |
+
├── project-statement.md
|
24 |
+
├── README.md
|
25 |
+
└── LICENSE
|
26 |
+
```
|
27 |
+
|
28 |
+
1. `font/`: This folder contains the fonts used in our client script's GUI mode.
|
29 |
+
2. `notebooks/`: This folder contains all Jupyter Notebooks for this project and their exported plots in `notebooks/images/`.
|
30 |
+
3. `scrape/`: This folder contains a scraping script to get more images from the internet for our dataset. All downloaded images will also be in this folder.
|
31 |
+
4. `classify.py`: Client script for classifying flower images using trained models.
|
32 |
+
5. `recommend.py`: Client script for recommending flower images using trained models.
|
33 |
+
6. `requirements.txt`: Text file for `pip` installation of necessary packages for development environment.
|
34 |
+
7. `.gitignore`: This file contains ignore VCS ignore rules.
|
35 |
+
8. `README.md`: A text file containing useful reference information about this project, including how to run the algorithm.
|
36 |
+
9. `LICENSE`: MIT
|
37 |
+
|
38 |
+
|
39 |
+
Additionally, these folders will be created during dataset fetching and model training:
|
40 |
+
|
41 |
+
1. `data/`: This folder contains out datasets.
|
42 |
+
2. `log/`: This folder contains training logs exported from training our models.
|
43 |
+
3. `models/`: This folder contains trained models exported after training.
|
44 |
+
|
45 |
---
|
46 |
+
|
47 |
+
|
48 |
+
## Getting Started 🚀
|
49 |
+
|
50 |
+
Clone this repository:
|
51 |
+
|
52 |
+
```bash
|
53 |
+
git clone https://github.com/rmit-denominator/bloomsage-ml.git
|
54 |
+
```
|
55 |
+
|
56 |
+
|
57 |
+
### Development Environment
|
58 |
+
|
59 |
+
To set up the necessary packages for this project, run:
|
60 |
+
|
61 |
+
```bash
|
62 |
+
pip install -r requirements.txt
|
63 |
+
```
|
64 |
+
|
65 |
+
Refer to [requirements.txt](requirements.txt) for package dependencies and their versions.
|
66 |
+
|
67 |
+
<span style="color:gold">**NOTE:**</span> It is recommended that you use a Python virtual environment to avoid conflict with your global packages, and to keep your global Python installation clean. This is because we require specific versions of Numpy, Tensorflow and Keras in our code to maintain backward compatibility and compatibility between trained models and client code.
|
68 |
+
|
69 |
+
|
70 |
+
### Download Dataset
|
71 |
+
|
72 |
+
The dataset for this project is available on [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1). Follow these steps to download and set it up for training and testing:
|
73 |
+
|
74 |
+
1. Navigate to project's root directory.
|
75 |
+
|
76 |
+
2. Clean all existing files in the `data/` folders (if exists) before downloading or updating this dataset:
|
77 |
+
|
78 |
+
```bash
|
79 |
+
rm -r ./data/*
|
80 |
+
```
|
81 |
+
|
82 |
+
3. Download and **extract contents of** the `.zip` from [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1) into `data/raw` folder.
|
83 |
+
|
84 |
+
Alternatively, use the [Kaggle CLI](https://github.com/Kaggle/kaggle-api):
|
85 |
+
|
86 |
+
```bash
|
87 |
+
kaggle datasets download -d miketvo/rmit-flowers -p ./data/raw/ --unzip
|
88 |
+
```
|
89 |
+
|
90 |
+
The resulting folder structure should look like this:
|
91 |
+
|
92 |
+
```
|
93 |
+
.
|
94 |
+
├── data/
|
95 |
+
│ └── raw/
|
96 |
+
│ ├── Baby/
|
97 |
+
│ ├── Calimerio/
|
98 |
+
│ ├── Chrysanthemum/
|
99 |
+
│ ...
|
100 |
+
│ └── Tana/
|
101 |
+
│
|
102 |
+
...
|
103 |
+
```
|
104 |
+
|
105 |
+
where each folder corresponds to a flower class, and contains images of only that class.
|
106 |
+
|
107 |
+
4. Setup for training and testing: Run [notebooks/Step2.DataPrep.ipynb](notebooks/Step2.DataPrep.ipynb) and [Step5.Recommender.ipynb](notebooks/Step5.Recommender.ipynb). They will clean, process, and split the raw dataset and the resulting train and test set into `data/train/` and `data/test/`, respectively. They will also generate a database for our image recommendation system in `data/recommender-database/`, along with `data/recommender-database.csv` that contains the feature vectors for all images in the recommender database, in addition to exporting two helper models `models/fe-cnn` and `models/clu-kmeans.model` for the recommendation system. **Note:** Clean these folders and files before you run these two notebook:
|
108 |
+
|
109 |
+
```bash
|
110 |
+
rmdir -r ./data/train
|
111 |
+
rmdir -r ./data/test
|
112 |
+
rmdir -r ./data/recommender-database
|
113 |
+
rm ./data/recommender-database.csv
|
114 |
+
```
|
115 |
+
|
116 |
+
**<span style="color:red">Important:</span>** Clean and rerun this step every time you modify the raw dataset to get the most updated train dataset, test dataset, and recommender database.
|
117 |
+
|
118 |
+
|
119 |
+
### Training
|
120 |
+
|
121 |
+
Skip this step if you just want to use on of the pre-trained model packages available from [Releases](https://github.com/miketvo/rmit2023a-cosc2753-assignment2/releases).
|
122 |
+
|
123 |
+
- Run each Jupyter Notebook in `notebooks/` in their prefixed order starting `Step1.`, `Step2.`, `Step3.`, and so on, <span style="color:red">**one file at a time**</span>.
|
124 |
+
- Skip [Step2.DataPrep.ipynb](notebooks/Step2.DataPrep.ipynb) if you have already run it after downloading the raw dataset in the step above.
|
125 |
+
- Skip [Step5.Recommender.ipynb](notebooks/Step5.Recommender.ipynb) if you have already run it after downloading the raw dataset in the step above.
|
126 |
+
- The resulting models are exported into `models/` folder. Their training logs are stored in `log/` folder.
|
127 |
+
|
128 |
+
**Note:** Beware: any existing model with conflicting name in `models/` will be replaced with newly trained models.
|
129 |
+
|
130 |
+
|
131 |
+
### Using Trained Models
|
132 |
+
|
133 |
+
If you are using one of our pre-trained model packages, download your desired version from [Releases](https://github.com/miketvo/rmit2023a-cosc2753-assignment2/releases) (.zip archives) and extract its contents into this project's root directory using your preferred zip program. Make sure to check and clean `models/` folder (if exists) to avoid naming conflict with existing trained model before the extraction.
|
134 |
+
|
135 |
+
These trained models can then be loaded into your code with:
|
136 |
+
|
137 |
+
```python
|
138 |
+
import tensorflow as tf
|
139 |
+
|
140 |
+
model = tf.keras.models.load_model('path/to/model')
|
141 |
+
```
|
142 |
+
|
143 |
+
Additionally, two Python files, `classify.py` and `recommend.py`, are provided as simple front-ends to our trained model. You can either run them as standalone script in the terminal or import them as Python module in your own Python script or Jupyter Notebook to programmatically classify multiple images and recommend similar images for each of them.
|
144 |
+
|
145 |
+
To use them as standalone script, see instruction below:
|
146 |
+
|
147 |
+
On your terminal, make sure that you have the environment activated for the client script to have access to all required packages:
|
148 |
+
|
149 |
+
- Python Virtualenv:
|
150 |
+
|
151 |
+
```bash
|
152 |
+
./venv/Scripts/activate
|
153 |
+
```
|
154 |
+
|
155 |
+
- Conda:
|
156 |
+
|
157 |
+
```bash
|
158 |
+
conda activate ./envs
|
159 |
+
```
|
160 |
+
|
161 |
+
#### Classifying Flower Images
|
162 |
+
|
163 |
+
Use the `classify.py` client script. Its syntax is as follows:
|
164 |
+
|
165 |
+
```text
|
166 |
+
usage: classify.py [-h] -f FILE [-c CLASSIFIER] [-g] [-v {0,1,2}]
|
167 |
+
|
168 |
+
options:
|
169 |
+
-h, --help show this help message and exit
|
170 |
+
-f FILE, --file FILE the image to be classified
|
171 |
+
-c CLASSIFIER, --classifier CLASSIFIER the machine learning model used for classification, defaults: models/clf-cnn
|
172 |
+
-g, --gui show classification result using GUI
|
173 |
+
-v {0,1,2}, --verbose-level {0,1,2} verbose level, default: 0
|
174 |
+
```
|
175 |
+
|
176 |
+
Example use:
|
177 |
+
|
178 |
+
```text
|
179 |
+
$ python ./classify.py -f path/to/your/your/image.png -c ./models/clf -v=1
|
180 |
+
Image image.png is classified as "Chrysanthemum" (model: "clf")
|
181 |
+
```
|
182 |
+
|
183 |
+
It also has a rudimentary GUI mode using Matplotlib, which will display the image with a caption of what flower type it is classified as:
|
184 |
+
|
185 |
+
```bash
|
186 |
+
python ./classify.py --gui -f path/to/your/your/image.png -m ./models/clf
|
187 |
+
```
|
188 |
+
|
189 |
+
**Note:** Alternatively, you can import its `classify.classify()` function into your own script or notebook to programmatically classify multiple images (see its docstring for instruction on how to use).
|
190 |
+
|
191 |
+
|
192 |
+
#### Recommending Flower Images
|
193 |
+
|
194 |
+
Use the `recommend.py` client script. Its syntax is as follows:
|
195 |
+
|
196 |
+
```text
|
197 |
+
usage: recommend.py [-h] -f FILE [-d DATABASE] [-c CLASSIFIER] [-e FEATURE_EXTRACTOR] [-k CLUSTERING_MODEL] [-n NUM]
|
198 |
+
|
199 |
+
options:
|
200 |
+
-h, --help show this help message and exit
|
201 |
+
-f FILE, --file FILE reference image
|
202 |
+
-d DATABASE, --database DATABASE the database containing the images to be recommended, default: data/recommender-database
|
203 |
+
-c CLASSIFIER, --classifier CLASSIFIER the machine learning model used for image classification, default: models/clf-cnn
|
204 |
+
-e FEATURE_EXTRACTOR, --feature-extractor FEATURE_EXTRACTOR the machine learning model used for image feature extraction, default: models/fe-cnn
|
205 |
+
-k CLUSTERING_MODEL, --clustering-model CLUSTERING_MODEL the machine learning model used for image clustering, default: models/clu-kmeans.model
|
206 |
+
-n NUM, --num NUM number of recommendations, default: 10
|
207 |
+
```
|
208 |
+
|
209 |
+
Example:
|
210 |
+
|
211 |
+
```bash
|
212 |
+
python ./recommend.py -f path/to/your/your/image.png
|
213 |
+
```
|
214 |
+
|
215 |
+
When executed, the code above will display 10 similar flower images (GUI mode) of the same type, taken from the recommender database in `data/recommender-database/`, based on your reference image, using the default classifier, feature extractor, and clustering model
|
__init__.py
ADDED
File without changes
|
classify.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from PIL import Image
|
6 |
+
import tensorflow as tf
|
7 |
+
from keras.engine.training import Model
|
8 |
+
|
9 |
+
from utils.glob import TARGET_IMG_SIZE
|
10 |
+
from utils.glob import CLASS_LABELS
|
11 |
+
import utils.data_manip as manip
|
12 |
+
|
13 |
+
|
14 |
+
def classify(image_path: str, classifier_path: str, verbose: bool = False, return_original: bool = True) -> tuple:
|
15 |
+
"""
|
16 |
+
Uses a trained machine learning model to classify an image loaded from disk.
|
17 |
+
|
18 |
+
:param image_path: Path to the image to be classified.
|
19 |
+
:param classifier_path: Path to the classifier model to be used.
|
20 |
+
:param verbose: Verbose output.
|
21 |
+
:param return_original: Whether to return the original image or the processed image.
|
22 |
+
:return: The original/processed image (PIL.image) and its classification (str).
|
23 |
+
"""
|
24 |
+
|
25 |
+
im_original = Image.open(image_path)
|
26 |
+
im_processed = manip.remove_transparency(im_original)
|
27 |
+
im_processed = manip.resize_crop(im_processed, TARGET_IMG_SIZE, TARGET_IMG_SIZE)
|
28 |
+
im_processed = manip.normalize_pixels(im_processed)
|
29 |
+
im_processed = tf.expand_dims(im_processed, axis=0)
|
30 |
+
|
31 |
+
model: Model = tf.keras.models.load_model(classifier_path)
|
32 |
+
pred = model.predict(im_processed, verbose=1 if verbose else 0)
|
33 |
+
|
34 |
+
pred_class_idx = tf.argmax(pred, axis=1).numpy()[0]
|
35 |
+
pred_class_label = CLASS_LABELS[pred_class_idx]
|
36 |
+
|
37 |
+
if return_original:
|
38 |
+
return im_original, pred_class_label
|
39 |
+
else:
|
40 |
+
return im_processed, pred_class_label
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == '__main__':
|
44 |
+
ap = argparse.ArgumentParser()
|
45 |
+
ap.add_argument('-f', '--file', required=True, help='the image to be classified')
|
46 |
+
ap.add_argument('-c', '--classifier', default='models/clf-cnn', help='the machine learning model used for classification, defaults: models/clf-cnn')
|
47 |
+
ap.add_argument('-g', '--gui', action='store_true', help='show classification result using GUI')
|
48 |
+
ap.add_argument('-v', '--verbose-level', choices=['0', '1', '2'], default='0', help="verbose level, default: 0")
|
49 |
+
args = vars(ap.parse_args())
|
50 |
+
verbose_level = int(args['verbose_level'])
|
51 |
+
|
52 |
+
img = os.path.abspath(args['file'])
|
53 |
+
clf = os.path.abspath(args['classifier'])
|
54 |
+
image, predicted_label = classify(img, clf, False if verbose_level < 2 else True)
|
55 |
+
|
56 |
+
if args['gui']:
|
57 |
+
fig, ax = plt.subplots(1, 1, num='Flower Image Classifier')
|
58 |
+
ax.imshow(image)
|
59 |
+
ax.set_title(
|
60 |
+
f'{predicted_label}',
|
61 |
+
fontsize=12,
|
62 |
+
weight='bold'
|
63 |
+
)
|
64 |
+
ax.text(
|
65 |
+
0.5, -0.08, f'{os.path.relpath(img)}',
|
66 |
+
horizontalalignment='center',
|
67 |
+
verticalalignment='center_baseline',
|
68 |
+
transform=ax.transAxes,
|
69 |
+
fontsize=8,
|
70 |
+
)
|
71 |
+
ax.axis('off')
|
72 |
+
plt.show()
|
73 |
+
else:
|
74 |
+
if verbose_level == 0:
|
75 |
+
print(predicted_label)
|
76 |
+
else:
|
77 |
+
print(
|
78 |
+
f'Image {os.path.basename(img)} is classified as "{predicted_label}" (model: "{os.path.basename(clf)}")'
|
79 |
+
)
|
project-statement.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Project: Classify Images of Flowers
|
2 |
+
|
3 |
+
Assume you are a team of machine learning engineers working for an ecommerce flower shop, where users can order flowers. Before users buy flowers, the systems should have a functionality to help users navigate to the type of flowers that users want to buy. In most of the current online flower shops, users should type the name of the flowers and browse from the list of the results. However, to enhance the quality of the searching results, our shop provides an image based searching function, where the users can upload the images of the flowers that they are looking for. The system will accomplish an image search and return the list of flowers which are similar to the input image from users.
|
4 |
+
|
5 |
+
In the dataset, there are 08 types of flowers:
|
6 |
+
|
7 |
+
- Baby
|
8 |
+
- Calimero
|
9 |
+
- Chrysanthemum
|
10 |
+
- Hydrangeas
|
11 |
+
- Lisianthus
|
12 |
+
- Pingpong
|
13 |
+
- Rosy
|
14 |
+
- Tana
|
15 |
+
|
16 |
+
|
17 |
+
## Goals
|
18 |
+
|
19 |
+
1. Classify images according to flower type above.
|
20 |
+
2. Recommend 10 flower images in the dataset which is similar to the input flower image from users.
|
21 |
+
|
22 |
+
|
23 |
+
## Dataset
|
24 |
+
|
25 |
+
The dataset for this project is available on [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1).
|
26 |
+
|
27 |
+
|
28 |
+
## Requirements
|
29 |
+
|
30 |
+
- You are required to do the pre-processing step on the Flower dataset, including extra collection if necessary.
|
31 |
+
- You must investigate at least one machine learning algorithms for each of the two tasks. That is, you must build at least one model capable of predicting the type of flower images, and at least one model capable of showing 10 similar images.
|
32 |
+
- You must submit two models (one for each task).
|
33 |
+
- You are not required to use separate type(s) of machine learning algorithms, however, a thorough investigation should consider different types of algorithms.
|
34 |
+
- You are required to fully train your own algorithms. You may not use pre-trained systems which are trained on other datasets (not given to you as part of this assignment).
|
35 |
+
- For higher grades (HD/DI) you must explore how the current status of the data will affect to the result of the models, how we can improve the models, and implement your suggestion to improve the models.
|
36 |
+
- Your final report must conduct an analysis and comparison between different model results, not only just one model.
|
37 |
+
|
38 |
+
|
39 |
+
## Independent Evaluation
|
40 |
+
|
41 |
+
- Your independent evaluation is to research other works that have the same goals. Then you must compare and contrast your results to those other works.
|
42 |
+
- Using data collected completely outside the scope of your original training and evaluation.
|
recommend.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os.path
|
3 |
+
import joblib
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
import tensorflow as tf
|
7 |
+
from PIL import Image
|
8 |
+
from matplotlib import pyplot as plt
|
9 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
+
|
11 |
+
from classify import classify
|
12 |
+
|
13 |
+
|
14 |
+
def recommend(
|
15 |
+
ref_path: str, num_recommendations: int,
|
16 |
+
data_path: str, clf_path: str, fe_path: str, clu_path: str,
|
17 |
+
) -> list:
|
18 |
+
"""
|
19 |
+
Recommends similar images based on a reference image.
|
20 |
+
|
21 |
+
:param ref_path: Path to the reference image.
|
22 |
+
:param num_recommendations: Number of recommended images to return.
|
23 |
+
:param data_path: Path to the .csv data file containing recommender database image feature vectors. This file must be generated using the same feature extractor specified in fe_path.
|
24 |
+
:param clf_path: Path to the classifier model file.
|
25 |
+
:param fe_path: Path to the feature extraction model file.
|
26 |
+
:param clu_path: Path to the clustering model file.
|
27 |
+
:return: List of paths to the recommended images.
|
28 |
+
"""
|
29 |
+
if num_recommendations < 1:
|
30 |
+
raise ValueError('Number of recommendations cannot be smaller than 1.')
|
31 |
+
|
32 |
+
df_rec = pd.read_csv(data_path)
|
33 |
+
fe = tf.keras.models.load_model(fe_path)
|
34 |
+
clu = joblib.load(clu_path)
|
35 |
+
clu.set_params(n_clusters=int(np.sqrt(len(df_rec) / num_recommendations)))
|
36 |
+
|
37 |
+
ref_processed, ref_class = classify(ref_path, classifier_path=clf_path, return_original=False, verbose=False)
|
38 |
+
recommendations = df_rec[df_rec['Class'] == ref_class]
|
39 |
+
|
40 |
+
# Extract reference image feature vector
|
41 |
+
ref_processed = np.squeeze(ref_processed)
|
42 |
+
ref_feature_vector = fe.predict(
|
43 |
+
tf.expand_dims(ref_processed, axis=0),
|
44 |
+
verbose=0
|
45 |
+
)
|
46 |
+
ref_feature_vector = ref_feature_vector.astype(float)
|
47 |
+
ref_feature_vector = ref_feature_vector.reshape(1, -1)
|
48 |
+
|
49 |
+
# Cluster reference image
|
50 |
+
clu.fit(recommendations.drop(['ImgPath', 'Class'], axis='columns').values)
|
51 |
+
ref_cluster = clu.predict(ref_feature_vector)
|
52 |
+
ref_cluster_indices = np.where(clu.labels_ == ref_cluster)[0]
|
53 |
+
recommendations = recommendations.iloc[ref_cluster_indices]
|
54 |
+
|
55 |
+
# Rank cluster and produce top cosine similarity recommendations
|
56 |
+
cosine_similarities = cosine_similarity(
|
57 |
+
ref_feature_vector,
|
58 |
+
recommendations.drop(['ImgPath', 'Class'], axis='columns')
|
59 |
+
)
|
60 |
+
sorted_ref_cluster_indices = np.argsort(-cosine_similarities.flatten())
|
61 |
+
if num_recommendations > len(sorted_ref_cluster_indices):
|
62 |
+
raise ValueError('Number of recommendations too large. Insufficient database size.')
|
63 |
+
top_ref_cluster_indices = sorted_ref_cluster_indices[:num_recommendations]
|
64 |
+
recommendations = recommendations.iloc[top_ref_cluster_indices]
|
65 |
+
|
66 |
+
return list(recommendations['ImgPath'].values)
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == '__main__':
|
70 |
+
ap = argparse.ArgumentParser()
|
71 |
+
ap.add_argument('-f', '--file', required=True, help='reference image')
|
72 |
+
ap.add_argument('-d', '--database', default='data/recommender-database', help='the database containing the images to be recommended, default: data/recommender-database')
|
73 |
+
ap.add_argument('-c', '--classifier', default='models/clf-cnn', help='the machine learning model used for image classification, default: models/clf-cnn')
|
74 |
+
ap.add_argument('-e', '--feature-extractor', default='models/fe-cnn', help='the machine learning model used for image feature extraction, default: models/fe-cnn')
|
75 |
+
ap.add_argument('-k', '--clustering-model', default='models/clu-kmeans.model', help='the machine learning model used for image clustering, default: models/clu-kmeans.model')
|
76 |
+
ap.add_argument('-n', '--num', required=False, default='10', help="number of recommendations, default: 10")
|
77 |
+
args = vars(ap.parse_args())
|
78 |
+
num = int(args['num'])
|
79 |
+
|
80 |
+
fig, axes = plt.subplots(max([1, num // 5]) + 1, 5, figsize=(16, 16), num='Flower Image Recommender')
|
81 |
+
axes = axes.ravel()
|
82 |
+
|
83 |
+
ref = Image.open(args['file'])
|
84 |
+
_, ref_class = classify(args['file'], classifier_path=args['classifier'], return_original=False, verbose=False)
|
85 |
+
axes[2].imshow(ref)
|
86 |
+
axes[2].set_title(
|
87 |
+
f'Reference Image - "{ref_class}"',
|
88 |
+
fontsize=10,
|
89 |
+
weight='bold'
|
90 |
+
)
|
91 |
+
axes[2].text(
|
92 |
+
0.5, -0.08, f'{os.path.relpath(args["file"])}',
|
93 |
+
horizontalalignment='center',
|
94 |
+
verticalalignment='center_baseline',
|
95 |
+
transform=axes[2].transAxes,
|
96 |
+
fontsize=8,
|
97 |
+
)
|
98 |
+
for i, rec_path in enumerate(recommend(
|
99 |
+
args['file'], int(args['num']),
|
100 |
+
args['database'] + '.csv', args['classifier'], args['feature_extractor'], args['clustering_model']
|
101 |
+
), start=5):
|
102 |
+
with Image.open(f'{args["database"]}/{rec_path}') as rec:
|
103 |
+
axes[i].imshow(rec)
|
104 |
+
|
105 |
+
for ax in axes:
|
106 |
+
ax.axis('off')
|
107 |
+
|
108 |
+
plt.show()
|
requirements.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
jupyter
|
2 |
+
tqdm
|
3 |
+
ipython
|
4 |
+
numpy==1.23.5
|
5 |
+
matplotlib
|
6 |
+
graphviz
|
7 |
+
pydot
|
8 |
+
pandas
|
9 |
+
seaborn
|
10 |
+
scikit-learn==1.2.2
|
11 |
+
scikit-image==0.20.0
|
12 |
+
imagehash==4.3.1
|
13 |
+
tensorflow==2.12.0
|
14 |
+
tensorflow-addons==0.20.0
|
15 |
+
keras==2.12.0
|
16 |
+
keras-tuner==1.3.5
|
17 |
+
icrawler
|
18 |
+
pillow==9.5.0
|
19 |
+
joblib
|
20 |
+
huggingface-hub
|