NamProlah commited on
Commit
af1bda1
·
1 Parent(s): 1d4757b

Upload 9 files

Browse files
Files changed (8) hide show
  1. .gitignore +285 -0
  2. LICENSE +21 -0
  3. README.md +213 -1
  4. __init__.py +0 -0
  5. classify.py +79 -0
  6. project-statement.md +42 -0
  7. recommend.py +108 -0
  8. requirements.txt +20 -0
.gitignore ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,jetbrains+all
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks,jetbrains+all
3
+
4
+ ### JetBrains+all ###
5
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
6
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7
+
8
+ # User-specific stuff
9
+ .idea/**/workspace.xml
10
+ .idea/**/tasks.xml
11
+ .idea/**/usage.statistics.xml
12
+ .idea/**/dictionaries
13
+ .idea/**/shelf
14
+
15
+ # AWS User-specific
16
+ .idea/**/aws.xml
17
+
18
+ # Generated files
19
+ .idea/**/contentModel.xml
20
+
21
+ # Sensitive or high-churn files
22
+ .idea/**/dataSources/
23
+ .idea/**/dataSources.ids
24
+ .idea/**/dataSources.local.xml
25
+ .idea/**/sqlDataSources.xml
26
+ .idea/**/dynamic.xml
27
+ .idea/**/uiDesigner.xml
28
+ .idea/**/dbnavigator.xml
29
+
30
+ # Gradle
31
+ .idea/**/gradle.xml
32
+ .idea/**/libraries
33
+
34
+ # Gradle and Maven with auto-import
35
+ # When using Gradle or Maven with auto-import, you should exclude module files,
36
+ # since they will be recreated, and may cause churn. Uncomment if using
37
+ # auto-import.
38
+ # .idea/artifacts
39
+ # .idea/compiler.xml
40
+ # .idea/jarRepositories.xml
41
+ # .idea/modules.xml
42
+ # .idea/*.iml
43
+ # .idea/modules
44
+ # *.iml
45
+ # *.ipr
46
+
47
+ # CMake
48
+ cmake-build-*/
49
+
50
+ # Mongo Explorer plugin
51
+ .idea/**/mongoSettings.xml
52
+
53
+ # File-based project format
54
+ *.iws
55
+
56
+ # IntelliJ
57
+ out/
58
+
59
+ # mpeltonen/sbt-idea plugin
60
+ .idea_modules/
61
+
62
+ # JIRA plugin
63
+ atlassian-ide-plugin.xml
64
+
65
+ # Cursive Clojure plugin
66
+ .idea/replstate.xml
67
+
68
+ # SonarLint plugin
69
+ .idea/sonarlint/
70
+
71
+ # Crashlytics plugin (for Android Studio and IntelliJ)
72
+ com_crashlytics_export_strings.xml
73
+ crashlytics.properties
74
+ crashlytics-build.properties
75
+ fabric.properties
76
+
77
+ # Editor-based Rest Client
78
+ .idea/httpRequests
79
+
80
+ # Android studio 3.1+ serialized cache file
81
+ .idea/caches/build_file_checksums.ser
82
+
83
+ ### JetBrains+all Patch ###
84
+ # Ignore everything but code style settings and run configurations
85
+ # that are supposed to be shared within teams.
86
+
87
+ .idea/*
88
+
89
+ !.idea/codeStyles
90
+ !.idea/runConfigurations
91
+ .idea/
92
+
93
+ ### JupyterNotebooks ###
94
+ # gitignore template for Jupyter Notebooks
95
+ # website: http://jupyter.org/
96
+
97
+ .ipynb_checkpoints
98
+ */.ipynb_checkpoints/*
99
+
100
+ # IPython
101
+ profile_default/
102
+ ipython_config.py
103
+
104
+ # Remove previous ipynb_checkpoints
105
+ # git rm -r .ipynb_checkpoints/
106
+
107
+ # Dataset
108
+ data/
109
+ scraping/*/
110
+ scraping/*.zip
111
+
112
+ # Machine Learning
113
+ log/
114
+ models/
115
+
116
+ ### Python ###
117
+ # Byte-compiled / optimized / DLL files
118
+ __pycache__/
119
+ *.py[cod]
120
+ *$py.class
121
+
122
+ # C extensions
123
+ *.so
124
+
125
+ # Distribution / packaging
126
+ .Python
127
+ build/
128
+ develop-eggs/
129
+ dist/
130
+ downloads/
131
+ eggs/
132
+ .eggs/
133
+ lib/
134
+ lib64/
135
+ parts/
136
+ sdist/
137
+ var/
138
+ wheels/
139
+ share/python-wheels/
140
+ *.egg-info/
141
+ .installed.cfg
142
+ *.egg
143
+ MANIFEST
144
+
145
+ # PyInstaller
146
+ # Usually these files are written by a python script from a template
147
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
148
+ *.manifest
149
+ *.spec
150
+
151
+ # Installer logs
152
+ pip-log.txt
153
+ pip-delete-this-directory.txt
154
+
155
+ # Unit test / coverage reports
156
+ htmlcov/
157
+ .tox/
158
+ .nox/
159
+ .coverage
160
+ .coverage.*
161
+ .cache
162
+ nosetests.xml
163
+ coverage.xml
164
+ *.cover
165
+ *.py,cover
166
+ .hypothesis/
167
+ .pytest_cache/
168
+ cover/
169
+
170
+ # Translations
171
+ *.mo
172
+ *.pot
173
+
174
+ # Django stuff:
175
+ *.log
176
+ local_settings.py
177
+ db.sqlite3
178
+ db.sqlite3-journal
179
+
180
+ # Flask stuff:
181
+ instance/
182
+ .webassets-cache
183
+
184
+ # Scrapy stuff:
185
+ .scrapy
186
+
187
+ # Sphinx documentation
188
+ docs/_build/
189
+
190
+ # PyBuilder
191
+ .pybuilder/
192
+ target/
193
+
194
+ # Jupyter Notebook
195
+
196
+ # IPython
197
+
198
+ # pyenv
199
+ # For a library or package, you might want to ignore these files since the code is
200
+ # intended to run in multiple environments; otherwise, check them in:
201
+ # .python-version
202
+
203
+ # pipenv
204
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
205
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
206
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
207
+ # install all needed dependencies.
208
+ #Pipfile.lock
209
+
210
+ # poetry
211
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
212
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
213
+ # commonly ignored for libraries.
214
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
215
+ #poetry.lock
216
+
217
+ # pdm
218
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
219
+ #pdm.lock
220
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
221
+ # in version control.
222
+ # https://pdm.fming.dev/#use-with-ide
223
+ .pdm.toml
224
+
225
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
226
+ __pypackages__/
227
+
228
+ # Celery stuff
229
+ celerybeat-schedule
230
+ celerybeat.pid
231
+
232
+ # SageMath parsed files
233
+ *.sage.py
234
+
235
+ # Environments
236
+ .env
237
+ .venv
238
+ env/
239
+ venv/
240
+ ENV/
241
+ env.bak/
242
+ venv.bak/
243
+
244
+ # Spyder project settings
245
+ .spyderproject
246
+ .spyproject
247
+
248
+ # Rope project settings
249
+ .ropeproject
250
+
251
+ # mkdocs documentation
252
+ /site
253
+
254
+ # mypy
255
+ .mypy_cache/
256
+ .dmypy.json
257
+ dmypy.json
258
+
259
+ # Pyre type checker
260
+ .pyre/
261
+
262
+ # pytype static type analyzer
263
+ .pytype/
264
+
265
+ # Cython debug symbols
266
+ cython_debug/
267
+
268
+ # PyCharm
269
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
270
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
271
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
272
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
273
+ #.idea/
274
+
275
+ ### Python Patch ###
276
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
277
+ poetry.toml
278
+
279
+ # ruff
280
+ .ruff_cache/
281
+
282
+ # LSP config files
283
+ pyrightconfig.json
284
+
285
+ # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks,jetbrains+all
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Tuong-Minh (Mike) Vo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,3 +1,215 @@
 
 
1
  ---
2
- license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BloomSage Machine Learning & MLOps Backend Component
2
+
3
  ---
4
+
5
+
6
+ ## Project Structure
7
+
8
+
9
+ ```
10
+ .
11
+ ├── font/
12
+ ├── notebooks/
13
+ │ ├── images/
14
+ │ ├── Step1.EDA.ipynb
15
+ │ ├── Step2.DataPrep.ipynb
16
+ │ └── Step3.Classifier-BaselineModel.ipynb
17
+ ├── scraping/
18
+ │ └── scrape.py
19
+ ├── classify.py
20
+ ├── recommend.py
21
+ ├── requirements.txt
22
+ ├── .gitignore
23
+ ├── project-statement.md
24
+ ├── README.md
25
+ └── LICENSE
26
+ ```
27
+
28
+ 1. `font/`: This folder contains the fonts used in our client script's GUI mode.
29
+ 2. `notebooks/`: This folder contains all Jupyter Notebooks for this project and their exported plots in `notebooks/images/`.
30
+ 3. `scrape/`: This folder contains a scraping script to get more images from the internet for our dataset. All downloaded images will also be in this folder.
31
+ 4. `classify.py`: Client script for classifying flower images using trained models.
32
+ 5. `recommend.py`: Client script for recommending flower images using trained models.
33
+ 6. `requirements.txt`: Text file for `pip` installation of necessary packages for development environment.
34
+ 7. `.gitignore`: This file contains ignore VCS ignore rules.
35
+ 8. `README.md`: A text file containing useful reference information about this project, including how to run the algorithm.
36
+ 9. `LICENSE`: MIT
37
+
38
+
39
+ Additionally, these folders will be created during dataset fetching and model training:
40
+
41
+ 1. `data/`: This folder contains out datasets.
42
+ 2. `log/`: This folder contains training logs exported from training our models.
43
+ 3. `models/`: This folder contains trained models exported after training.
44
+
45
  ---
46
+
47
+
48
+ ## Getting Started 🚀
49
+
50
+ Clone this repository:
51
+
52
+ ```bash
53
+ git clone https://github.com/rmit-denominator/bloomsage-ml.git
54
+ ```
55
+
56
+
57
+ ### Development Environment
58
+
59
+ To set up the necessary packages for this project, run:
60
+
61
+ ```bash
62
+ pip install -r requirements.txt
63
+ ```
64
+
65
+ Refer to [requirements.txt](requirements.txt) for package dependencies and their versions.
66
+
67
+ <span style="color:gold">**NOTE:**</span> It is recommended that you use a Python virtual environment to avoid conflict with your global packages, and to keep your global Python installation clean. This is because we require specific versions of Numpy, Tensorflow and Keras in our code to maintain backward compatibility and compatibility between trained models and client code.
68
+
69
+
70
+ ### Download Dataset
71
+
72
+ The dataset for this project is available on [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1). Follow these steps to download and set it up for training and testing:
73
+
74
+ 1. Navigate to project's root directory.
75
+
76
+ 2. Clean all existing files in the `data/` folders (if exists) before downloading or updating this dataset:
77
+
78
+ ```bash
79
+ rm -r ./data/*
80
+ ```
81
+
82
+ 3. Download and **extract contents of** the `.zip` from [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1) into `data/raw` folder.
83
+
84
+ Alternatively, use the [Kaggle CLI](https://github.com/Kaggle/kaggle-api):
85
+
86
+ ```bash
87
+ kaggle datasets download -d miketvo/rmit-flowers -p ./data/raw/ --unzip
88
+ ```
89
+
90
+ The resulting folder structure should look like this:
91
+
92
+ ```
93
+ .
94
+ ├── data/
95
+ │ └── raw/
96
+ │ ├── Baby/
97
+ │ ├── Calimerio/
98
+ │ ├── Chrysanthemum/
99
+ │ ...
100
+ │ └── Tana/
101
+
102
+ ...
103
+ ```
104
+
105
+ where each folder corresponds to a flower class, and contains images of only that class.
106
+
107
+ 4. Setup for training and testing: Run [notebooks/Step2.DataPrep.ipynb](notebooks/Step2.DataPrep.ipynb) and [Step5.Recommender.ipynb](notebooks/Step5.Recommender.ipynb). They will clean, process, and split the raw dataset and the resulting train and test set into `data/train/` and `data/test/`, respectively. They will also generate a database for our image recommendation system in `data/recommender-database/`, along with `data/recommender-database.csv` that contains the feature vectors for all images in the recommender database, in addition to exporting two helper models `models/fe-cnn` and `models/clu-kmeans.model` for the recommendation system. **Note:** Clean these folders and files before you run these two notebook:
108
+
109
+ ```bash
110
+ rmdir -r ./data/train
111
+ rmdir -r ./data/test
112
+ rmdir -r ./data/recommender-database
113
+ rm ./data/recommender-database.csv
114
+ ```
115
+
116
+ **<span style="color:red">Important:</span>** Clean and rerun this step every time you modify the raw dataset to get the most updated train dataset, test dataset, and recommender database.
117
+
118
+
119
+ ### Training
120
+
121
+ Skip this step if you just want to use on of the pre-trained model packages available from [Releases](https://github.com/miketvo/rmit2023a-cosc2753-assignment2/releases).
122
+
123
+ - Run each Jupyter Notebook in `notebooks/` in their prefixed order starting `Step1.`, `Step2.`, `Step3.`, and so on, <span style="color:red">**one file at a time**</span>.
124
+ - Skip [Step2.DataPrep.ipynb](notebooks/Step2.DataPrep.ipynb) if you have already run it after downloading the raw dataset in the step above.
125
+ - Skip [Step5.Recommender.ipynb](notebooks/Step5.Recommender.ipynb) if you have already run it after downloading the raw dataset in the step above.
126
+ - The resulting models are exported into `models/` folder. Their training logs are stored in `log/` folder.
127
+
128
+ **Note:** Beware: any existing model with conflicting name in `models/` will be replaced with newly trained models.
129
+
130
+
131
+ ### Using Trained Models
132
+
133
+ If you are using one of our pre-trained model packages, download your desired version from [Releases](https://github.com/miketvo/rmit2023a-cosc2753-assignment2/releases) (.zip archives) and extract its contents into this project's root directory using your preferred zip program. Make sure to check and clean `models/` folder (if exists) to avoid naming conflict with existing trained model before the extraction.
134
+
135
+ These trained models can then be loaded into your code with:
136
+
137
+ ```python
138
+ import tensorflow as tf
139
+
140
+ model = tf.keras.models.load_model('path/to/model')
141
+ ```
142
+
143
+ Additionally, two Python files, `classify.py` and `recommend.py`, are provided as simple front-ends to our trained model. You can either run them as standalone script in the terminal or import them as Python module in your own Python script or Jupyter Notebook to programmatically classify multiple images and recommend similar images for each of them.
144
+
145
+ To use them as standalone script, see instruction below:
146
+
147
+ On your terminal, make sure that you have the environment activated for the client script to have access to all required packages:
148
+
149
+ - Python Virtualenv:
150
+
151
+ ```bash
152
+ ./venv/Scripts/activate
153
+ ```
154
+
155
+ - Conda:
156
+
157
+ ```bash
158
+ conda activate ./envs
159
+ ```
160
+
161
+ #### Classifying Flower Images
162
+
163
+ Use the `classify.py` client script. Its syntax is as follows:
164
+
165
+ ```text
166
+ usage: classify.py [-h] -f FILE [-c CLASSIFIER] [-g] [-v {0,1,2}]
167
+
168
+ options:
169
+ -h, --help show this help message and exit
170
+ -f FILE, --file FILE the image to be classified
171
+ -c CLASSIFIER, --classifier CLASSIFIER the machine learning model used for classification, defaults: models/clf-cnn
172
+ -g, --gui show classification result using GUI
173
+ -v {0,1,2}, --verbose-level {0,1,2} verbose level, default: 0
174
+ ```
175
+
176
+ Example use:
177
+
178
+ ```text
179
+ $ python ./classify.py -f path/to/your/your/image.png -c ./models/clf -v=1
180
+ Image image.png is classified as "Chrysanthemum" (model: "clf")
181
+ ```
182
+
183
+ It also has a rudimentary GUI mode using Matplotlib, which will display the image with a caption of what flower type it is classified as:
184
+
185
+ ```bash
186
+ python ./classify.py --gui -f path/to/your/your/image.png -m ./models/clf
187
+ ```
188
+
189
+ **Note:** Alternatively, you can import its `classify.classify()` function into your own script or notebook to programmatically classify multiple images (see its docstring for instruction on how to use).
190
+
191
+
192
+ #### Recommending Flower Images
193
+
194
+ Use the `recommend.py` client script. Its syntax is as follows:
195
+
196
+ ```text
197
+ usage: recommend.py [-h] -f FILE [-d DATABASE] [-c CLASSIFIER] [-e FEATURE_EXTRACTOR] [-k CLUSTERING_MODEL] [-n NUM]
198
+
199
+ options:
200
+ -h, --help show this help message and exit
201
+ -f FILE, --file FILE reference image
202
+ -d DATABASE, --database DATABASE the database containing the images to be recommended, default: data/recommender-database
203
+ -c CLASSIFIER, --classifier CLASSIFIER the machine learning model used for image classification, default: models/clf-cnn
204
+ -e FEATURE_EXTRACTOR, --feature-extractor FEATURE_EXTRACTOR the machine learning model used for image feature extraction, default: models/fe-cnn
205
+ -k CLUSTERING_MODEL, --clustering-model CLUSTERING_MODEL the machine learning model used for image clustering, default: models/clu-kmeans.model
206
+ -n NUM, --num NUM number of recommendations, default: 10
207
+ ```
208
+
209
+ Example:
210
+
211
+ ```bash
212
+ python ./recommend.py -f path/to/your/your/image.png
213
+ ```
214
+
215
+ When executed, the code above will display 10 similar flower images (GUI mode) of the same type, taken from the recommender database in `data/recommender-database/`, based on your reference image, using the default classifier, feature extractor, and clustering model
__init__.py ADDED
File without changes
classify.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+
4
+ import matplotlib.pyplot as plt
5
+ from PIL import Image
6
+ import tensorflow as tf
7
+ from keras.engine.training import Model
8
+
9
+ from utils.glob import TARGET_IMG_SIZE
10
+ from utils.glob import CLASS_LABELS
11
+ import utils.data_manip as manip
12
+
13
+
14
+ def classify(image_path: str, classifier_path: str, verbose: bool = False, return_original: bool = True) -> tuple:
15
+ """
16
+ Uses a trained machine learning model to classify an image loaded from disk.
17
+
18
+ :param image_path: Path to the image to be classified.
19
+ :param classifier_path: Path to the classifier model to be used.
20
+ :param verbose: Verbose output.
21
+ :param return_original: Whether to return the original image or the processed image.
22
+ :return: The original/processed image (PIL.image) and its classification (str).
23
+ """
24
+
25
+ im_original = Image.open(image_path)
26
+ im_processed = manip.remove_transparency(im_original)
27
+ im_processed = manip.resize_crop(im_processed, TARGET_IMG_SIZE, TARGET_IMG_SIZE)
28
+ im_processed = manip.normalize_pixels(im_processed)
29
+ im_processed = tf.expand_dims(im_processed, axis=0)
30
+
31
+ model: Model = tf.keras.models.load_model(classifier_path)
32
+ pred = model.predict(im_processed, verbose=1 if verbose else 0)
33
+
34
+ pred_class_idx = tf.argmax(pred, axis=1).numpy()[0]
35
+ pred_class_label = CLASS_LABELS[pred_class_idx]
36
+
37
+ if return_original:
38
+ return im_original, pred_class_label
39
+ else:
40
+ return im_processed, pred_class_label
41
+
42
+
43
+ if __name__ == '__main__':
44
+ ap = argparse.ArgumentParser()
45
+ ap.add_argument('-f', '--file', required=True, help='the image to be classified')
46
+ ap.add_argument('-c', '--classifier', default='models/clf-cnn', help='the machine learning model used for classification, defaults: models/clf-cnn')
47
+ ap.add_argument('-g', '--gui', action='store_true', help='show classification result using GUI')
48
+ ap.add_argument('-v', '--verbose-level', choices=['0', '1', '2'], default='0', help="verbose level, default: 0")
49
+ args = vars(ap.parse_args())
50
+ verbose_level = int(args['verbose_level'])
51
+
52
+ img = os.path.abspath(args['file'])
53
+ clf = os.path.abspath(args['classifier'])
54
+ image, predicted_label = classify(img, clf, False if verbose_level < 2 else True)
55
+
56
+ if args['gui']:
57
+ fig, ax = plt.subplots(1, 1, num='Flower Image Classifier')
58
+ ax.imshow(image)
59
+ ax.set_title(
60
+ f'{predicted_label}',
61
+ fontsize=12,
62
+ weight='bold'
63
+ )
64
+ ax.text(
65
+ 0.5, -0.08, f'{os.path.relpath(img)}',
66
+ horizontalalignment='center',
67
+ verticalalignment='center_baseline',
68
+ transform=ax.transAxes,
69
+ fontsize=8,
70
+ )
71
+ ax.axis('off')
72
+ plt.show()
73
+ else:
74
+ if verbose_level == 0:
75
+ print(predicted_label)
76
+ else:
77
+ print(
78
+ f'Image {os.path.basename(img)} is classified as "{predicted_label}" (model: "{os.path.basename(clf)}")'
79
+ )
project-statement.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project: Classify Images of Flowers
2
+
3
+ Assume you are a team of machine learning engineers working for an ecommerce flower shop, where users can order flowers. Before users buy flowers, the systems should have a functionality to help users navigate to the type of flowers that users want to buy. In most of the current online flower shops, users should type the name of the flowers and browse from the list of the results. However, to enhance the quality of the searching results, our shop provides an image based searching function, where the users can upload the images of the flowers that they are looking for. The system will accomplish an image search and return the list of flowers which are similar to the input image from users.
4
+
5
+ In the dataset, there are 08 types of flowers:
6
+
7
+ - Baby
8
+ - Calimero
9
+ - Chrysanthemum
10
+ - Hydrangeas
11
+ - Lisianthus
12
+ - Pingpong
13
+ - Rosy
14
+ - Tana
15
+
16
+
17
+ ## Goals
18
+
19
+ 1. Classify images according to flower type above.
20
+ 2. Recommend 10 flower images in the dataset which is similar to the input flower image from users.
21
+
22
+
23
+ ## Dataset
24
+
25
+ The dataset for this project is available on [Kaggle](https://kaggle.com/datasets/979207e9d5e6d91d26e8eb340941ae176c82fbdb2a25b4a436c273895ab96bb1).
26
+
27
+
28
+ ## Requirements
29
+
30
+ - You are required to do the pre-processing step on the Flower dataset, including extra collection if necessary.
31
+ - You must investigate at least one machine learning algorithms for each of the two tasks. That is, you must build at least one model capable of predicting the type of flower images, and at least one model capable of showing 10 similar images.
32
+ - You must submit two models (one for each task).
33
+ - You are not required to use separate type(s) of machine learning algorithms, however, a thorough investigation should consider different types of algorithms.
34
+ - You are required to fully train your own algorithms. You may not use pre-trained systems which are trained on other datasets (not given to you as part of this assignment).
35
+ - For higher grades (HD/DI) you must explore how the current status of the data will affect to the result of the models, how we can improve the models, and implement your suggestion to improve the models.
36
+ - Your final report must conduct an analysis and comparison between different model results, not only just one model.
37
+
38
+
39
+ ## Independent Evaluation
40
+
41
+ - Your independent evaluation is to research other works that have the same goals. Then you must compare and contrast your results to those other works.
42
+ - Using data collected completely outside the scope of your original training and evaluation.
recommend.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os.path
3
+ import joblib
4
+ import numpy as np
5
+ import pandas as pd
6
+ import tensorflow as tf
7
+ from PIL import Image
8
+ from matplotlib import pyplot as plt
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ from classify import classify
12
+
13
+
14
+ def recommend(
15
+ ref_path: str, num_recommendations: int,
16
+ data_path: str, clf_path: str, fe_path: str, clu_path: str,
17
+ ) -> list:
18
+ """
19
+ Recommends similar images based on a reference image.
20
+
21
+ :param ref_path: Path to the reference image.
22
+ :param num_recommendations: Number of recommended images to return.
23
+ :param data_path: Path to the .csv data file containing recommender database image feature vectors. This file must be generated using the same feature extractor specified in fe_path.
24
+ :param clf_path: Path to the classifier model file.
25
+ :param fe_path: Path to the feature extraction model file.
26
+ :param clu_path: Path to the clustering model file.
27
+ :return: List of paths to the recommended images.
28
+ """
29
+ if num_recommendations < 1:
30
+ raise ValueError('Number of recommendations cannot be smaller than 1.')
31
+
32
+ df_rec = pd.read_csv(data_path)
33
+ fe = tf.keras.models.load_model(fe_path)
34
+ clu = joblib.load(clu_path)
35
+ clu.set_params(n_clusters=int(np.sqrt(len(df_rec) / num_recommendations)))
36
+
37
+ ref_processed, ref_class = classify(ref_path, classifier_path=clf_path, return_original=False, verbose=False)
38
+ recommendations = df_rec[df_rec['Class'] == ref_class]
39
+
40
+ # Extract reference image feature vector
41
+ ref_processed = np.squeeze(ref_processed)
42
+ ref_feature_vector = fe.predict(
43
+ tf.expand_dims(ref_processed, axis=0),
44
+ verbose=0
45
+ )
46
+ ref_feature_vector = ref_feature_vector.astype(float)
47
+ ref_feature_vector = ref_feature_vector.reshape(1, -1)
48
+
49
+ # Cluster reference image
50
+ clu.fit(recommendations.drop(['ImgPath', 'Class'], axis='columns').values)
51
+ ref_cluster = clu.predict(ref_feature_vector)
52
+ ref_cluster_indices = np.where(clu.labels_ == ref_cluster)[0]
53
+ recommendations = recommendations.iloc[ref_cluster_indices]
54
+
55
+ # Rank cluster and produce top cosine similarity recommendations
56
+ cosine_similarities = cosine_similarity(
57
+ ref_feature_vector,
58
+ recommendations.drop(['ImgPath', 'Class'], axis='columns')
59
+ )
60
+ sorted_ref_cluster_indices = np.argsort(-cosine_similarities.flatten())
61
+ if num_recommendations > len(sorted_ref_cluster_indices):
62
+ raise ValueError('Number of recommendations too large. Insufficient database size.')
63
+ top_ref_cluster_indices = sorted_ref_cluster_indices[:num_recommendations]
64
+ recommendations = recommendations.iloc[top_ref_cluster_indices]
65
+
66
+ return list(recommendations['ImgPath'].values)
67
+
68
+
69
+ if __name__ == '__main__':
70
+ ap = argparse.ArgumentParser()
71
+ ap.add_argument('-f', '--file', required=True, help='reference image')
72
+ ap.add_argument('-d', '--database', default='data/recommender-database', help='the database containing the images to be recommended, default: data/recommender-database')
73
+ ap.add_argument('-c', '--classifier', default='models/clf-cnn', help='the machine learning model used for image classification, default: models/clf-cnn')
74
+ ap.add_argument('-e', '--feature-extractor', default='models/fe-cnn', help='the machine learning model used for image feature extraction, default: models/fe-cnn')
75
+ ap.add_argument('-k', '--clustering-model', default='models/clu-kmeans.model', help='the machine learning model used for image clustering, default: models/clu-kmeans.model')
76
+ ap.add_argument('-n', '--num', required=False, default='10', help="number of recommendations, default: 10")
77
+ args = vars(ap.parse_args())
78
+ num = int(args['num'])
79
+
80
+ fig, axes = plt.subplots(max([1, num // 5]) + 1, 5, figsize=(16, 16), num='Flower Image Recommender')
81
+ axes = axes.ravel()
82
+
83
+ ref = Image.open(args['file'])
84
+ _, ref_class = classify(args['file'], classifier_path=args['classifier'], return_original=False, verbose=False)
85
+ axes[2].imshow(ref)
86
+ axes[2].set_title(
87
+ f'Reference Image - "{ref_class}"',
88
+ fontsize=10,
89
+ weight='bold'
90
+ )
91
+ axes[2].text(
92
+ 0.5, -0.08, f'{os.path.relpath(args["file"])}',
93
+ horizontalalignment='center',
94
+ verticalalignment='center_baseline',
95
+ transform=axes[2].transAxes,
96
+ fontsize=8,
97
+ )
98
+ for i, rec_path in enumerate(recommend(
99
+ args['file'], int(args['num']),
100
+ args['database'] + '.csv', args['classifier'], args['feature_extractor'], args['clustering_model']
101
+ ), start=5):
102
+ with Image.open(f'{args["database"]}/{rec_path}') as rec:
103
+ axes[i].imshow(rec)
104
+
105
+ for ax in axes:
106
+ ax.axis('off')
107
+
108
+ plt.show()
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ jupyter
2
+ tqdm
3
+ ipython
4
+ numpy==1.23.5
5
+ matplotlib
6
+ graphviz
7
+ pydot
8
+ pandas
9
+ seaborn
10
+ scikit-learn==1.2.2
11
+ scikit-image==0.20.0
12
+ imagehash==4.3.1
13
+ tensorflow==2.12.0
14
+ tensorflow-addons==0.20.0
15
+ keras==2.12.0
16
+ keras-tuner==1.3.5
17
+ icrawler
18
+ pillow==9.5.0
19
+ joblib
20
+ huggingface-hub