hakim
commited on
Commit
·
7195b15
1
Parent(s):
ecb0e90
pipeline added
Browse files- .dvcignore +3 -0
- .gitignore +2 -0
- Dockerfile +11 -0
- README.md +111 -1
- app.py +70 -0
- config/config.yaml +21 -0
- dvc.lock +113 -0
- dvc.yaml +51 -0
- main.py +51 -0
- params.yaml +8 -0
- requirements.txt +18 -0
- research/01_data_ingestion.ipynb +177 -0
- research/02_prepare_base_model.ipynb +292 -0
- research/03_model_training.ipynb +285 -0
- research/model_evaluation.ipynb +339 -0
- research/trials.ipynb +0 -0
- scores.json +4 -0
- setup.py +27 -0
- src/cnnClassifier/__init__.py +22 -0
- src/cnnClassifier/components/__init__.py +0 -0
- src/cnnClassifier/components/data_ingestion.py +40 -0
- src/cnnClassifier/components/evaluation.py +83 -0
- src/cnnClassifier/components/prepare_base_model.py +67 -0
- src/cnnClassifier/components/training.py +86 -0
- src/cnnClassifier/config/__init__.py +0 -0
- src/cnnClassifier/config/configuration.py +88 -0
- src/cnnClassifier/constant/__init__.py +4 -0
- src/cnnClassifier/entity/__init__.py +0 -0
- src/cnnClassifier/entity/config_entity.py +46 -0
- src/cnnClassifier/pipeline/__init__.py +0 -0
- src/cnnClassifier/pipeline/predict.py +30 -0
- src/cnnClassifier/pipeline/stage_01_data_ingestion.py +28 -0
- src/cnnClassifier/pipeline/stage_02_prepare_base_model.py +31 -0
- src/cnnClassifier/pipeline/stage_03_train_model.py +33 -0
- src/cnnClassifier/pipeline/stage_04_evaluation.py +32 -0
- src/cnnClassifier/utils/__init__.py +4 -0
- src/cnnClassifier/utils/common.py +145 -0
- template.py +42 -0
.dvcignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Add patterns of files dvc should ignore, which could improve
|
2 |
+
# the performance. Learn more at
|
3 |
+
# https://dvc.org/doc/user-guide/dvcignore
|
.gitignore
CHANGED
@@ -160,3 +160,5 @@ cython_debug/
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
|
|
|
|
|
160 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
163 |
+
artifacts/*
|
164 |
+
model.h5/*
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . /code
|
10 |
+
|
11 |
+
CMD ["streamlit", "run", "app.py"]
|
README.md
CHANGED
@@ -1 +1,111 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Image To Text App
|
3 |
+
emoji: 📹
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: red
|
6 |
+
sdk: streamlit
|
7 |
+
app_file: app.py
|
8 |
+
pinned: false
|
9 |
+
---
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
# Kidney-disease-classification-mlops
|
14 |
+
|
15 |
+
## Workflows
|
16 |
+
|
17 |
+
1. Update config.yaml
|
18 |
+
2. Update secrets.yaml [Optional]
|
19 |
+
3. Update params.yaml
|
20 |
+
4. Update the entity
|
21 |
+
5. Update the configuration manager in src config
|
22 |
+
6. Update the components
|
23 |
+
7. Update the pipeline
|
24 |
+
8. Update the main.py
|
25 |
+
9. Update the dvc.yaml
|
26 |
+
10. app.py
|
27 |
+
|
28 |
+
# How to run?
|
29 |
+
### STEPS:
|
30 |
+
|
31 |
+
Clone the repository
|
32 |
+
|
33 |
+
```bash
|
34 |
+
https://github.com/HAKIM-ML/
|
35 |
+
Kidney-disease-classification-mlops
|
36 |
+
```
|
37 |
+
### STEP 01- Create a conda environment after opening the repository
|
38 |
+
|
39 |
+
```bash
|
40 |
+
conda create -n cnncls python=3.8 -y
|
41 |
+
```
|
42 |
+
|
43 |
+
```bash
|
44 |
+
conda activate cnncls
|
45 |
+
```
|
46 |
+
|
47 |
+
|
48 |
+
### STEP 02- install the requirements
|
49 |
+
```bash
|
50 |
+
pip install -r requirements.txt
|
51 |
+
```
|
52 |
+
|
53 |
+
```bash
|
54 |
+
# Finally run the following command
|
55 |
+
python app.py
|
56 |
+
```
|
57 |
+
|
58 |
+
Now,
|
59 |
+
```bash
|
60 |
+
open up you local host and port
|
61 |
+
```
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
## MLflow
|
69 |
+
|
70 |
+
- [Documentation](https://mlflow.org/docs/latest/index.html)
|
71 |
+
|
72 |
+
|
73 |
+
##### cmd
|
74 |
+
- mlflow ui
|
75 |
+
|
76 |
+
### dagshub
|
77 |
+
[dagshub](https://dagshub.com/)
|
78 |
+
MLFLOW_TRACKING_URI = https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow
|
79 |
+
|
80 |
+
|
81 |
+
python script.py
|
82 |
+
|
83 |
+
import dagshub
|
84 |
+
dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)
|
85 |
+
|
86 |
+
import mlflow
|
87 |
+
with mlflow.start_run():
|
88 |
+
mlflow.log_param('parameter name', 'value')
|
89 |
+
mlflow.log_metric('metric name', 1)
|
90 |
+
|
91 |
+
### DVC cmd
|
92 |
+
|
93 |
+
1. dvc init
|
94 |
+
2. dvc repro
|
95 |
+
3. dvc dag
|
96 |
+
|
97 |
+
|
98 |
+
## About MLflow & DVC
|
99 |
+
|
100 |
+
MLflow
|
101 |
+
|
102 |
+
- Its Production Grade
|
103 |
+
- Trace all of your expriements
|
104 |
+
- Logging & taging your model
|
105 |
+
|
106 |
+
|
107 |
+
DVC
|
108 |
+
|
109 |
+
- Its very lite weight for POC only
|
110 |
+
- lite weight expriements tracker
|
111 |
+
- It can perform Orchestration (Creating Pipelines)
|
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import io
|
4 |
+
from PIL import Image
|
5 |
+
import os
|
6 |
+
from cnnClassifier.pipeline.predict import Prediction
|
7 |
+
|
8 |
+
st.set_page_config(page_title="Chicken Health Predictor", page_icon="🐔", layout="wide")
|
9 |
+
|
10 |
+
st.title("🐔 Chicken Health Predictor")
|
11 |
+
st.markdown("### Upload an image to predict if the chicken is healthy or has coccidiosis")
|
12 |
+
|
13 |
+
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
14 |
+
|
15 |
+
col1, col2 = st.columns(2)
|
16 |
+
|
17 |
+
if uploaded_file is not None:
|
18 |
+
image = Image.open(uploaded_file)
|
19 |
+
col1.image(image, caption="Uploaded Image", use_column_width=True)
|
20 |
+
|
21 |
+
# Save the uploaded file temporarily
|
22 |
+
temp_file = "temp_image.jpg"
|
23 |
+
image.save(temp_file)
|
24 |
+
|
25 |
+
with st.spinner("Analyzing the image..."):
|
26 |
+
predictor = Prediction(temp_file)
|
27 |
+
prediction = predictor.predict()
|
28 |
+
|
29 |
+
# Remove the temporary file
|
30 |
+
os.remove(temp_file)
|
31 |
+
|
32 |
+
col2.markdown("## Prediction Result")
|
33 |
+
if prediction == "Normal":
|
34 |
+
col2.success(f"The chicken appears to be **{prediction}**! 🎉")
|
35 |
+
col2.markdown("Keep up the good care for your feathered friend!")
|
36 |
+
else:
|
37 |
+
col2.error(f"The kidney may have **{prediction}**. 😢")
|
38 |
+
col2.markdown("Please consult with a veterinarian for proper treatment.")
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
st.sidebar.title("About")
|
43 |
+
st.sidebar.info(
|
44 |
+
"This app uses a deep learning model to predict whether a chicken is healthy "
|
45 |
+
"or has coccidiosis based on an uploaded image. Always consult with a "
|
46 |
+
"veterinarian for accurate diagnosis and treatment."
|
47 |
+
)
|
48 |
+
|
49 |
+
st.sidebar.title("Instructions")
|
50 |
+
st.sidebar.markdown(
|
51 |
+
"""
|
52 |
+
1. Upload a clear image of a chicken.
|
53 |
+
2. Wait for the model to analyze the image.
|
54 |
+
3. View the prediction result and additional information.
|
55 |
+
"""
|
56 |
+
)
|
57 |
+
|
58 |
+
st.markdown(
|
59 |
+
"""
|
60 |
+
<style>
|
61 |
+
.reportview-container {
|
62 |
+
background: linear-gradient(to right, #FDFCFB, #E2D1C3);
|
63 |
+
}
|
64 |
+
.sidebar .sidebar-content {
|
65 |
+
background: linear-gradient(to bottom, #FDFCFB, #E2D1C3);
|
66 |
+
}
|
67 |
+
</style>
|
68 |
+
""",
|
69 |
+
unsafe_allow_html=True,
|
70 |
+
)
|
config/config.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
atifacts_root : artifacts
|
2 |
+
|
3 |
+
data_ingestion:
|
4 |
+
root_dir : artifacts/data_ingestion
|
5 |
+
source_URL : https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing
|
6 |
+
local_data_file : artifacts/data_ingestion/data.zip
|
7 |
+
unzip_dir : artifacts/data_ingestion/unzip
|
8 |
+
|
9 |
+
|
10 |
+
prepare_base_model:
|
11 |
+
root_dir: artifacts/prepare_base_model
|
12 |
+
base_model_path: artifacts/prepare_base_model/base_model.h5
|
13 |
+
updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
training:
|
18 |
+
root_dir: artifacts/training
|
19 |
+
trained_model_path : artifacts/training/model.h5
|
20 |
+
|
21 |
+
|
dvc.lock
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
schema: '2.0'
|
2 |
+
stages:
|
3 |
+
data_ingestion:
|
4 |
+
cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
5 |
+
deps:
|
6 |
+
- path: config/config.yaml
|
7 |
+
hash: md5
|
8 |
+
md5: 18c5d166940398f449d80f3bf7ceba78
|
9 |
+
size: 601
|
10 |
+
- path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
11 |
+
hash: md5
|
12 |
+
md5: 9ab8c5d8d045a810fdc294c23dba44a2
|
13 |
+
size: 906
|
14 |
+
outs:
|
15 |
+
- path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
16 |
+
hash: md5
|
17 |
+
md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
|
18 |
+
size: 58936381
|
19 |
+
nfiles: 465
|
20 |
+
prepare_base_model:
|
21 |
+
cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
22 |
+
deps:
|
23 |
+
- path: config/config.yaml
|
24 |
+
hash: md5
|
25 |
+
md5: 18c5d166940398f449d80f3bf7ceba78
|
26 |
+
size: 601
|
27 |
+
- path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
28 |
+
hash: md5
|
29 |
+
md5: e8e39a301f4b90d1b4f2c86acc66ef32
|
30 |
+
size: 999
|
31 |
+
params:
|
32 |
+
params.yaml:
|
33 |
+
CLASSES: 2
|
34 |
+
IMAGE_SIZE:
|
35 |
+
- 224
|
36 |
+
- 224
|
37 |
+
- 3
|
38 |
+
INCLUDE_TOP: false
|
39 |
+
LEARNING_RATE: 0.009
|
40 |
+
WEIGHTS: imagenet
|
41 |
+
outs:
|
42 |
+
- path: artifacts/prepare_base_model
|
43 |
+
hash: md5
|
44 |
+
md5: 186cffa6925a8727cbd781402a5b2d91.dir
|
45 |
+
size: 118054560
|
46 |
+
nfiles: 2
|
47 |
+
training:
|
48 |
+
cmd: python src/cnnClassifier/pipeline/stage_03_train_model.py
|
49 |
+
deps:
|
50 |
+
- path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
51 |
+
hash: md5
|
52 |
+
md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
|
53 |
+
size: 58936381
|
54 |
+
nfiles: 465
|
55 |
+
- path: artifacts/prepare_base_model
|
56 |
+
hash: md5
|
57 |
+
md5: 186cffa6925a8727cbd781402a5b2d91.dir
|
58 |
+
size: 118054560
|
59 |
+
nfiles: 2
|
60 |
+
- path: config/config.yaml
|
61 |
+
hash: md5
|
62 |
+
md5: 18c5d166940398f449d80f3bf7ceba78
|
63 |
+
size: 601
|
64 |
+
- path: src/cnnClassifier/pipeline/stage_03_train_model.py
|
65 |
+
hash: md5
|
66 |
+
md5: 3ef39a6e5a0d665c7c48877e098f3c82
|
67 |
+
size: 919
|
68 |
+
params:
|
69 |
+
params.yaml:
|
70 |
+
AUGMENTATION: true
|
71 |
+
BATCH_SIZE: 32
|
72 |
+
EPOCHS: 2
|
73 |
+
IMAGE_SIZE:
|
74 |
+
- 224
|
75 |
+
- 224
|
76 |
+
- 3
|
77 |
+
outs:
|
78 |
+
- path: artifacts/training/model.h5
|
79 |
+
hash: md5
|
80 |
+
md5: b860a0e1daa2296bf1ab06265a233dae
|
81 |
+
size: 59337520
|
82 |
+
evaluation:
|
83 |
+
cmd: python src/cnnClassifier/pipeline/stage_04_evaluation.py
|
84 |
+
deps:
|
85 |
+
- path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
86 |
+
hash: md5
|
87 |
+
md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
|
88 |
+
size: 58936381
|
89 |
+
nfiles: 465
|
90 |
+
- path: artifacts/training/model.h5
|
91 |
+
hash: md5
|
92 |
+
md5: b860a0e1daa2296bf1ab06265a233dae
|
93 |
+
size: 59337520
|
94 |
+
- path: config/config.yaml
|
95 |
+
hash: md5
|
96 |
+
md5: 18c5d166940398f449d80f3bf7ceba78
|
97 |
+
size: 601
|
98 |
+
- path: src/cnnClassifier/pipeline/stage_04_evaluation.py
|
99 |
+
hash: md5
|
100 |
+
md5: ef1e7f821e740d1e4a9d51a4bf724e68
|
101 |
+
size: 888
|
102 |
+
params:
|
103 |
+
params.yaml:
|
104 |
+
BATCH_SIZE: 32
|
105 |
+
IMAGE_SIZE:
|
106 |
+
- 224
|
107 |
+
- 224
|
108 |
+
- 3
|
109 |
+
outs:
|
110 |
+
- path: scores.json
|
111 |
+
hash: md5
|
112 |
+
md5: 8930e55b40b3d3c2866622648a461a1c
|
113 |
+
size: 72
|
dvc.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
stages:
|
2 |
+
data_ingestion:
|
3 |
+
cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
4 |
+
deps:
|
5 |
+
- src/cnnClassifier/pipeline/stage_01_data_ingestion.py
|
6 |
+
- config/config.yaml
|
7 |
+
outs:
|
8 |
+
- artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
9 |
+
|
10 |
+
prepare_base_model:
|
11 |
+
cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
12 |
+
deps:
|
13 |
+
- src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
|
14 |
+
- config/config.yaml
|
15 |
+
params:
|
16 |
+
- IMAGE_SIZE
|
17 |
+
- INCLUDE_TOP
|
18 |
+
- CLASSES
|
19 |
+
- WEIGHTS
|
20 |
+
- LEARNING_RATE
|
21 |
+
outs:
|
22 |
+
- artifacts/prepare_base_model
|
23 |
+
|
24 |
+
training:
|
25 |
+
cmd: python src/cnnClassifier/pipeline/stage_03_train_model.py
|
26 |
+
deps:
|
27 |
+
- src/cnnClassifier/pipeline/stage_03_train_model.py
|
28 |
+
- config/config.yaml
|
29 |
+
- artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
30 |
+
- artifacts/prepare_base_model
|
31 |
+
params:
|
32 |
+
- IMAGE_SIZE
|
33 |
+
- EPOCHS
|
34 |
+
- BATCH_SIZE
|
35 |
+
- AUGMENTATION
|
36 |
+
outs:
|
37 |
+
- artifacts/training/model.h5
|
38 |
+
|
39 |
+
evaluation:
|
40 |
+
cmd: python src/cnnClassifier/pipeline/stage_04_evaluation.py
|
41 |
+
deps:
|
42 |
+
- src/cnnClassifier/pipeline/stage_04_evaluation.py
|
43 |
+
- config/config.yaml
|
44 |
+
- artifacts/data_ingestion/unzip/kidney-ct-scan-image
|
45 |
+
- artifacts/training/model.h5
|
46 |
+
params:
|
47 |
+
- IMAGE_SIZE
|
48 |
+
- BATCH_SIZE
|
49 |
+
metrics:
|
50 |
+
- scores.json:
|
51 |
+
cache: false
|
main.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier import logger
|
2 |
+
from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
|
3 |
+
from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline
|
4 |
+
from cnnClassifier.pipeline.stage_03_train_model import ModelTrainingPipeline
|
5 |
+
from cnnClassifier.pipeline.stage_04_evaluation import EvaluationTrainingPipeline
|
6 |
+
|
7 |
+
STAGE_NAME = "Data Ingestion stage"
|
8 |
+
try:
|
9 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
10 |
+
data_ingestion = DataIngestionTrainingPipeline()
|
11 |
+
data_ingestion.main()
|
12 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
13 |
+
except Exception as e:
|
14 |
+
logger.exception(e)
|
15 |
+
raise e
|
16 |
+
|
17 |
+
STAGE_NAME = "Prepare Base Model stage"
|
18 |
+
try:
|
19 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
20 |
+
prepare_base_model = PrepareBaseModelTrainingPipeline()
|
21 |
+
prepare_base_model.main()
|
22 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
23 |
+
except Exception as e:
|
24 |
+
logger.exception(e)
|
25 |
+
raise e
|
26 |
+
|
27 |
+
|
28 |
+
STAGE_NAME = "Model Training"
|
29 |
+
|
30 |
+
try:
|
31 |
+
logger.info(f"*******************")
|
32 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
33 |
+
obj = ModelTrainingPipeline()
|
34 |
+
obj.main()
|
35 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
36 |
+
except Exception as e:
|
37 |
+
logger.exception(e)
|
38 |
+
raise e
|
39 |
+
|
40 |
+
|
41 |
+
STAGE_NAME = "Evaluation"
|
42 |
+
|
43 |
+
try:
|
44 |
+
logger.info(f"*******************")
|
45 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
46 |
+
obj = EvaluationTrainingPipeline()
|
47 |
+
obj.main()
|
48 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
49 |
+
except Exception as e:
|
50 |
+
logger.exception(e)
|
51 |
+
raise e
|
params.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
AUGMENTATION : TRUE
|
2 |
+
IMAGE_SIZE: [224,224,3]
|
3 |
+
EPOCHS : 2
|
4 |
+
BATCH_SIZE : 32
|
5 |
+
LEARNING_RATE : 0.009
|
6 |
+
CLASSES : 2
|
7 |
+
WEIGHTS : imagenet
|
8 |
+
INCLUDE_TOP : False
|
requirements.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tensorflow==2.12.0
|
2 |
+
pandas
|
3 |
+
dvc
|
4 |
+
mlflow==2.2.2
|
5 |
+
notebook
|
6 |
+
numpy
|
7 |
+
matplotlib
|
8 |
+
seaborn
|
9 |
+
python-box==6.0.2
|
10 |
+
pyYAML
|
11 |
+
tqdm
|
12 |
+
ensure==1.0.2
|
13 |
+
joblib
|
14 |
+
types-PyYAML
|
15 |
+
scipy
|
16 |
+
Flask
|
17 |
+
Flask-Cors
|
18 |
+
gdown
|
research/01_data_ingestion.ipynb
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"os.chdir('../')"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 2,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"# entity\n",
|
20 |
+
"from dataclasses import dataclass\n",
|
21 |
+
"from pathlib import Path\n",
|
22 |
+
"\n",
|
23 |
+
"@dataclass(frozen=True)\n",
|
24 |
+
"class DataIngestionConfig:\n",
|
25 |
+
" root_dir: Path\n",
|
26 |
+
" source_URL: str\n",
|
27 |
+
" local_data_file: Path\n",
|
28 |
+
" unzip_dir: Path"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 11,
|
34 |
+
"metadata": {},
|
35 |
+
"outputs": [],
|
36 |
+
"source": [
|
37 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
38 |
+
"from cnnClassifier.constant import *\n",
|
39 |
+
"# Configuration\n",
|
40 |
+
"class ConfigurationManager:\n",
|
41 |
+
" def __init__(\n",
|
42 |
+
" self,\n",
|
43 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
44 |
+
" params_filepath = PARAMS_FILE_PATH\n",
|
45 |
+
" ):\n",
|
46 |
+
" self.config = read_yaml(config_filepath)\n",
|
47 |
+
" self.params = read_yaml(params_filepath)\n",
|
48 |
+
" \n",
|
49 |
+
" create_directories([self.config.atifacts_root])\n",
|
50 |
+
" \n",
|
51 |
+
" \n",
|
52 |
+
" \n",
|
53 |
+
" def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
|
54 |
+
" config = self.config.data_ingestion\n",
|
55 |
+
" create_directories([config.root_dir])\n",
|
56 |
+
" \n",
|
57 |
+
" data_ingestion_config = DataIngestionConfig(\n",
|
58 |
+
" root_dir=config.root_dir,\n",
|
59 |
+
" source_URL=config.source_URL,\n",
|
60 |
+
" local_data_file=config.local_data_file,\n",
|
61 |
+
" unzip_dir=config.unzip_dir\n",
|
62 |
+
" )\n",
|
63 |
+
" \n",
|
64 |
+
" return data_ingestion_config\n",
|
65 |
+
" \n",
|
66 |
+
" \n",
|
67 |
+
" "
|
68 |
+
]
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"cell_type": "code",
|
72 |
+
"execution_count": 12,
|
73 |
+
"metadata": {},
|
74 |
+
"outputs": [],
|
75 |
+
"source": [
|
76 |
+
"# components\n",
|
77 |
+
"\n",
|
78 |
+
"import os\n",
|
79 |
+
"import zipfile\n",
|
80 |
+
"import gdown\n",
|
81 |
+
"from cnnClassifier import logger\n",
|
82 |
+
"from cnnClassifier.utils.common import get_size"
|
83 |
+
]
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"cell_type": "code",
|
87 |
+
"execution_count": 13,
|
88 |
+
"metadata": {},
|
89 |
+
"outputs": [],
|
90 |
+
"source": [
|
91 |
+
"class DataIngestion:\n",
|
92 |
+
" def __init__(self, config: DataIngestionConfig):\n",
|
93 |
+
" self.config = config\n",
|
94 |
+
" \n",
|
95 |
+
" def download_file(self) -> str:\n",
|
96 |
+
" try:\n",
|
97 |
+
" dataset_url = self.config.source_URL\n",
|
98 |
+
" zip_download_dir = self.config.local_data_file\n",
|
99 |
+
" os.makedirs('artifacts/datasets', exist_ok=True)\n",
|
100 |
+
" logger.info(f'Downloading data from {dataset_url} into {zip_download_dir}')\n",
|
101 |
+
" \n",
|
102 |
+
" file_id = dataset_url.split('/')[-2]\n",
|
103 |
+
" prefix = 'https://drive.google.com/uc?/export=download&id='\n",
|
104 |
+
" gdown.download(prefix + file_id, zip_download_dir)\n",
|
105 |
+
" logger.info(f'Downloaded data from {dataset_url} into {zip_download_dir}')\n",
|
106 |
+
" \n",
|
107 |
+
" except Exception as e:\n",
|
108 |
+
" raise e\n",
|
109 |
+
" \n",
|
110 |
+
" \n",
|
111 |
+
" \n",
|
112 |
+
" def extract_zip_file(self):\n",
|
113 |
+
" \"\"\"\n",
|
114 |
+
" zip_file_path: str\n",
|
115 |
+
" Extracts the zip file into the data directory\n",
|
116 |
+
" Function returns None\n",
|
117 |
+
" \"\"\"\n",
|
118 |
+
" unzip_path = self.config.unzip_dir\n",
|
119 |
+
" os.makedirs(unzip_path, exist_ok=True)\n",
|
120 |
+
" with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
|
121 |
+
" zip_ref.extractall(unzip_path)"
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"execution_count": null,
|
127 |
+
"metadata": {},
|
128 |
+
"outputs": [
|
129 |
+
{
|
130 |
+
"name": "stderr",
|
131 |
+
"output_type": "stream",
|
132 |
+
"text": [
|
133 |
+
" 5%|▍ | 2.62M/57.7M [00:30<03:49, 240kB/s]"
|
134 |
+
]
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"source": [
|
138 |
+
"try:\n",
|
139 |
+
" config = ConfigurationManager()\n",
|
140 |
+
" data_ingestion_config = config.get_data_ingestion_config()\n",
|
141 |
+
" data_ingestion = DataIngestion(config=data_ingestion_config)\n",
|
142 |
+
" data_ingestion.download_file()\n",
|
143 |
+
" data_ingestion.extrat_zip_file()\n",
|
144 |
+
"except Exception as e:\n",
|
145 |
+
" raise e"
|
146 |
+
]
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"cell_type": "code",
|
150 |
+
"execution_count": null,
|
151 |
+
"metadata": {},
|
152 |
+
"outputs": [],
|
153 |
+
"source": []
|
154 |
+
}
|
155 |
+
],
|
156 |
+
"metadata": {
|
157 |
+
"kernelspec": {
|
158 |
+
"display_name": "Python 3",
|
159 |
+
"language": "python",
|
160 |
+
"name": "python3"
|
161 |
+
},
|
162 |
+
"language_info": {
|
163 |
+
"codemirror_mode": {
|
164 |
+
"name": "ipython",
|
165 |
+
"version": 3
|
166 |
+
},
|
167 |
+
"file_extension": ".py",
|
168 |
+
"mimetype": "text/x-python",
|
169 |
+
"name": "python",
|
170 |
+
"nbconvert_exporter": "python",
|
171 |
+
"pygments_lexer": "ipython3",
|
172 |
+
"version": "3.11.0"
|
173 |
+
}
|
174 |
+
},
|
175 |
+
"nbformat": 4,
|
176 |
+
"nbformat_minor": 2
|
177 |
+
}
|
research/02_prepare_base_model.ipynb
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"os.chdir('../')"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 2,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"from dataclasses import dataclass\n",
|
20 |
+
"from pathlib import Path\n",
|
21 |
+
"\n",
|
22 |
+
"\n",
|
23 |
+
"@dataclass(frozen=True)\n",
|
24 |
+
"class PrepareBaseModelConfig:\n",
|
25 |
+
" root_dir: Path\n",
|
26 |
+
" base_model_path: Path\n",
|
27 |
+
" updated_base_model_path: Path\n",
|
28 |
+
" params_image_size: list\n",
|
29 |
+
" params_learning_rate: float\n",
|
30 |
+
" params_include_top: bool\n",
|
31 |
+
" params_weights: str\n",
|
32 |
+
" params_classes: int"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"execution_count": 3,
|
38 |
+
"metadata": {},
|
39 |
+
"outputs": [],
|
40 |
+
"source": [
|
41 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
42 |
+
"from cnnClassifier.constant import *\n",
|
43 |
+
"# Configuration\n",
|
44 |
+
"class ConfigurationManager:\n",
|
45 |
+
" def __init__(\n",
|
46 |
+
" self,\n",
|
47 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
48 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
49 |
+
"\n",
|
50 |
+
" self.config = read_yaml(config_filepath)\n",
|
51 |
+
" self.params = read_yaml(params_filepath)\n",
|
52 |
+
"\n",
|
53 |
+
" create_directories([self.config.atifacts_root])\n",
|
54 |
+
"\n",
|
55 |
+
" \n",
|
56 |
+
"\n",
|
57 |
+
" def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
|
58 |
+
" config = self.config.prepare_base_model\n",
|
59 |
+
" \n",
|
60 |
+
" create_directories([config.root_dir])\n",
|
61 |
+
"\n",
|
62 |
+
" prepare_base_model_config = PrepareBaseModelConfig(\n",
|
63 |
+
" root_dir=Path(config.root_dir),\n",
|
64 |
+
" base_model_path=Path(config.base_model_path),\n",
|
65 |
+
" updated_base_model_path=Path(config.updated_base_model_path),\n",
|
66 |
+
" params_image_size=self.params.IMAGE_SIZE,\n",
|
67 |
+
" params_learning_rate=self.params.LEARNING_RATE,\n",
|
68 |
+
" params_include_top=self.params.INCLUDE_TOP,\n",
|
69 |
+
" params_weights=self.params.WEIGHTS,\n",
|
70 |
+
" params_classes=self.params.CLASSES\n",
|
71 |
+
" )\n",
|
72 |
+
"\n",
|
73 |
+
" return prepare_base_model_config"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 4,
|
79 |
+
"metadata": {},
|
80 |
+
"outputs": [
|
81 |
+
{
|
82 |
+
"name": "stdout",
|
83 |
+
"output_type": "stream",
|
84 |
+
"text": [
|
85 |
+
"[2024-07-30 03:19:34,344: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
|
86 |
+
"]\n"
|
87 |
+
]
|
88 |
+
}
|
89 |
+
],
|
90 |
+
"source": [
|
91 |
+
"import os\n",
|
92 |
+
"import urllib.request as request\n",
|
93 |
+
"import tensorflow as tf\n",
|
94 |
+
"\n",
|
95 |
+
"class PrepareBaseModel:\n",
|
96 |
+
" def __init__(self, config: PrepareBaseModelConfig):\n",
|
97 |
+
" self.config = config\n",
|
98 |
+
"\n",
|
99 |
+
" \n",
|
100 |
+
" def get_base_model(self):\n",
|
101 |
+
" self.model = tf.keras.applications.vgg16.VGG16(\n",
|
102 |
+
" input_shape=self.config.params_image_size,\n",
|
103 |
+
" weights=self.config.params_weights,\n",
|
104 |
+
" include_top=self.config.params_include_top\n",
|
105 |
+
" )\n",
|
106 |
+
"\n",
|
107 |
+
" self.save_model(path=self.config.base_model_path, model=self.model)\n",
|
108 |
+
"\n",
|
109 |
+
" \n",
|
110 |
+
"\n",
|
111 |
+
" @staticmethod\n",
|
112 |
+
" def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
|
113 |
+
" if freeze_all:\n",
|
114 |
+
" for layer in model.layers:\n",
|
115 |
+
" model.trainable = False\n",
|
116 |
+
" elif (freeze_till is not None) and (freeze_till > 0):\n",
|
117 |
+
" for layer in model.layers[:-freeze_till]:\n",
|
118 |
+
" model.trainable = False\n",
|
119 |
+
"\n",
|
120 |
+
" flatten_in = tf.keras.layers.Flatten()(model.output)\n",
|
121 |
+
" prediction = tf.keras.layers.Dense(\n",
|
122 |
+
" units=classes,\n",
|
123 |
+
" activation=\"softmax\"\n",
|
124 |
+
" )(flatten_in)\n",
|
125 |
+
"\n",
|
126 |
+
" full_model = tf.keras.models.Model(\n",
|
127 |
+
" inputs=model.input,\n",
|
128 |
+
" outputs=prediction\n",
|
129 |
+
" )\n",
|
130 |
+
"\n",
|
131 |
+
" full_model.compile(\n",
|
132 |
+
" optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
|
133 |
+
" loss=tf.keras.losses.CategoricalCrossentropy(),\n",
|
134 |
+
" metrics=[\"accuracy\"]\n",
|
135 |
+
" )\n",
|
136 |
+
"\n",
|
137 |
+
" full_model.summary()\n",
|
138 |
+
" return full_model\n",
|
139 |
+
" \n",
|
140 |
+
" \n",
|
141 |
+
" def update_base_model(self):\n",
|
142 |
+
" self.full_model = self._prepare_full_model(\n",
|
143 |
+
" model=self.model,\n",
|
144 |
+
" classes=self.config.params_classes,\n",
|
145 |
+
" freeze_all=True,\n",
|
146 |
+
" freeze_till=None,\n",
|
147 |
+
" learning_rate=self.config.params_learning_rate\n",
|
148 |
+
" )\n",
|
149 |
+
"\n",
|
150 |
+
" self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
|
151 |
+
"\n",
|
152 |
+
" \n",
|
153 |
+
" \n",
|
154 |
+
" @staticmethod\n",
|
155 |
+
" def save_model(path: Path, model: tf.keras.Model):\n",
|
156 |
+
" model.save(path)"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"cell_type": "code",
|
161 |
+
"execution_count": 5,
|
162 |
+
"metadata": {},
|
163 |
+
"outputs": [
|
164 |
+
{
|
165 |
+
"name": "stdout",
|
166 |
+
"output_type": "stream",
|
167 |
+
"text": [
|
168 |
+
"[2024-07-30 03:19:36,293: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
169 |
+
"[2024-07-30 03:19:36,296: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
170 |
+
"[2024-07-30 03:19:36,298: INFO: common: Created directory at: artifacts]\n",
|
171 |
+
"[2024-07-30 03:19:36,299: INFO: common: Created directory at: artifacts/prepare_base_model]\n",
|
172 |
+
"[2024-07-30 03:19:36,531: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
|
173 |
+
"]\n",
|
174 |
+
"[2024-07-30 03:19:36,660: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
|
175 |
+
"]\n",
|
176 |
+
"[2024-07-30 03:19:37,174: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
|
177 |
+
"Model: \"model\"\n",
|
178 |
+
"_________________________________________________________________\n",
|
179 |
+
" Layer (type) Output Shape Param # \n",
|
180 |
+
"=================================================================\n",
|
181 |
+
" input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n",
|
182 |
+
" \n",
|
183 |
+
" block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",
|
184 |
+
" \n",
|
185 |
+
" block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",
|
186 |
+
" \n",
|
187 |
+
" block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",
|
188 |
+
" \n",
|
189 |
+
" block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",
|
190 |
+
" \n",
|
191 |
+
" block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",
|
192 |
+
" \n",
|
193 |
+
" block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",
|
194 |
+
" \n",
|
195 |
+
" block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",
|
196 |
+
" \n",
|
197 |
+
" block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",
|
198 |
+
" \n",
|
199 |
+
" block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",
|
200 |
+
" \n",
|
201 |
+
" block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",
|
202 |
+
" \n",
|
203 |
+
" block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",
|
204 |
+
" \n",
|
205 |
+
" block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",
|
206 |
+
" \n",
|
207 |
+
" block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",
|
208 |
+
" \n",
|
209 |
+
" block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"name": "stderr",
|
214 |
+
"output_type": "stream",
|
215 |
+
"text": [
|
216 |
+
"c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
|
217 |
+
" saving_api.save_model(\n"
|
218 |
+
]
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"name": "stdout",
|
222 |
+
"output_type": "stream",
|
223 |
+
"text": [
|
224 |
+
" \n",
|
225 |
+
" block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
226 |
+
" \n",
|
227 |
+
" block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
228 |
+
" \n",
|
229 |
+
" block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",
|
230 |
+
" \n",
|
231 |
+
" block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n",
|
232 |
+
" \n",
|
233 |
+
" flatten (Flatten) (None, 25088) 0 \n",
|
234 |
+
" \n",
|
235 |
+
" dense (Dense) (None, 2) 50178 \n",
|
236 |
+
" \n",
|
237 |
+
"=================================================================\n",
|
238 |
+
"Total params: 14764866 (56.32 MB)\n",
|
239 |
+
"Trainable params: 50178 (196.01 KB)\n",
|
240 |
+
"Non-trainable params: 14714688 (56.13 MB)\n",
|
241 |
+
"_________________________________________________________________\n"
|
242 |
+
]
|
243 |
+
}
|
244 |
+
],
|
245 |
+
"source": [
|
246 |
+
"try:\n",
|
247 |
+
" config = ConfigurationManager()\n",
|
248 |
+
" prepare_base_model_config = config.get_prepare_base_model_config()\n",
|
249 |
+
" prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
|
250 |
+
" prepare_base_model.get_base_model()\n",
|
251 |
+
" prepare_base_model.update_base_model()\n",
|
252 |
+
"except Exception as e:\n",
|
253 |
+
" raise e"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": null,
|
259 |
+
"metadata": {},
|
260 |
+
"outputs": [],
|
261 |
+
"source": []
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"cell_type": "code",
|
265 |
+
"execution_count": null,
|
266 |
+
"metadata": {},
|
267 |
+
"outputs": [],
|
268 |
+
"source": []
|
269 |
+
}
|
270 |
+
],
|
271 |
+
"metadata": {
|
272 |
+
"kernelspec": {
|
273 |
+
"display_name": "Python 3",
|
274 |
+
"language": "python",
|
275 |
+
"name": "python3"
|
276 |
+
},
|
277 |
+
"language_info": {
|
278 |
+
"codemirror_mode": {
|
279 |
+
"name": "ipython",
|
280 |
+
"version": 3
|
281 |
+
},
|
282 |
+
"file_extension": ".py",
|
283 |
+
"mimetype": "text/x-python",
|
284 |
+
"name": "python",
|
285 |
+
"nbconvert_exporter": "python",
|
286 |
+
"pygments_lexer": "ipython3",
|
287 |
+
"version": "3.11.0"
|
288 |
+
}
|
289 |
+
},
|
290 |
+
"nbformat": 4,
|
291 |
+
"nbformat_minor": 2
|
292 |
+
}
|
research/03_model_training.ipynb
ADDED
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"os.chdir('../')\n"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 2,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [
|
18 |
+
{
|
19 |
+
"data": {
|
20 |
+
"text/plain": [
|
21 |
+
"'d:\\\\MLOps-Project\\\\Kidney-disease-classification-mlops'"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"execution_count": 2,
|
25 |
+
"metadata": {},
|
26 |
+
"output_type": "execute_result"
|
27 |
+
}
|
28 |
+
],
|
29 |
+
"source": [
|
30 |
+
"%pwd"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": 4,
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"from dataclasses import dataclass\n",
|
40 |
+
"from pathlib import Path\n",
|
41 |
+
"\n",
|
42 |
+
"@dataclass(frozen=True)\n",
|
43 |
+
"class TrainingConfig:\n",
|
44 |
+
" root_dir : Path\n",
|
45 |
+
" training_model_path : Path\n",
|
46 |
+
" updata_base_model_path : Path\n",
|
47 |
+
" training_data: Path\n",
|
48 |
+
" params_epochs : int\n",
|
49 |
+
" params_is_augmentation : bool\n",
|
50 |
+
" params_batch_size : int\n",
|
51 |
+
" params_image_size : list\n",
|
52 |
+
" "
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": 9,
|
58 |
+
"metadata": {},
|
59 |
+
"outputs": [],
|
60 |
+
"source": [
|
61 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
62 |
+
"from cnnClassifier.constant import *\n",
|
63 |
+
"\n",
|
64 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
65 |
+
"from cnnClassifier.constant import *\n",
|
66 |
+
"# Configuration\n",
|
67 |
+
"class ConfigurationManager:\n",
|
68 |
+
" def __init__(\n",
|
69 |
+
" self,\n",
|
70 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
71 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
72 |
+
"\n",
|
73 |
+
" self.config = read_yaml(config_filepath)\n",
|
74 |
+
" self.params = read_yaml(params_filepath)\n",
|
75 |
+
"\n",
|
76 |
+
" create_directories([self.config.atifacts_root])\n",
|
77 |
+
" \n",
|
78 |
+
" def get_training_config(self) -> TrainingConfig:\n",
|
79 |
+
" training = self.config.training\n",
|
80 |
+
" prepare_base_model =self.config.prepare_base_model\n",
|
81 |
+
" params = self.params\n",
|
82 |
+
" training_data = os.path.join(self.config.data_ingestion.unzip_dir, 'kidney-ct-scan-image') \n",
|
83 |
+
" \n",
|
84 |
+
" create_directories([\n",
|
85 |
+
" Path(training.root_dir)\n",
|
86 |
+
" ])\n",
|
87 |
+
" \n",
|
88 |
+
" training_config = TrainingConfig(\n",
|
89 |
+
" root_dir= Path(training.root_dir),\n",
|
90 |
+
" training_model_path=Path(training.trained_model_path),\n",
|
91 |
+
" updata_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
|
92 |
+
" training_data = Path(training_data),\n",
|
93 |
+
" params_epochs = params.EPOCHS, \n",
|
94 |
+
" params_batch_size= params.BATCH_SIZE,\n",
|
95 |
+
" params_is_augmentation= params.AUGMENTATION,\n",
|
96 |
+
" params_image_size= params.IMAGE_SIZE\n",
|
97 |
+
" )\n",
|
98 |
+
" \n",
|
99 |
+
" return training_config\n",
|
100 |
+
" "
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"cell_type": "code",
|
105 |
+
"execution_count": 10,
|
106 |
+
"metadata": {},
|
107 |
+
"outputs": [],
|
108 |
+
"source": [
|
109 |
+
"import tensorflow as tf"
|
110 |
+
]
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"cell_type": "code",
|
114 |
+
"execution_count": 19,
|
115 |
+
"metadata": {},
|
116 |
+
"outputs": [],
|
117 |
+
"source": [
|
118 |
+
"class Training:\n",
|
119 |
+
" def __init__(self, confg : TrainingConfig):\n",
|
120 |
+
" self.config = confg\n",
|
121 |
+
" \n",
|
122 |
+
" def get_base_model(self):\n",
|
123 |
+
" self.model = tf.keras.models.load_model(\n",
|
124 |
+
" self.config.updata_base_model_path\n",
|
125 |
+
" )\n",
|
126 |
+
" \n",
|
127 |
+
" \n",
|
128 |
+
" def train_vaid_generator(self):\n",
|
129 |
+
" datagenerator_kwargs = dict(\n",
|
130 |
+
" rescale = 1 / 255,\n",
|
131 |
+
" validation_split = 0.20\n",
|
132 |
+
" )\n",
|
133 |
+
" \n",
|
134 |
+
" dataflow_kwargs = dict(\n",
|
135 |
+
" target_size = self.config.params_image_size[:-1],\n",
|
136 |
+
" batch_size = self.config.params_batch_size,\n",
|
137 |
+
" interpolation = 'bilinear'\n",
|
138 |
+
" )\n",
|
139 |
+
" valid_datagernerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
140 |
+
" **datagenerator_kwargs\n",
|
141 |
+
" )\n",
|
142 |
+
" \n",
|
143 |
+
" self.valid_generator = valid_datagernerator.flow_from_directory(\n",
|
144 |
+
" directory = self.config.training_data,\n",
|
145 |
+
" subset = 'validation',\n",
|
146 |
+
" shuffle = True,\n",
|
147 |
+
" **dataflow_kwargs\n",
|
148 |
+
" )\n",
|
149 |
+
" \n",
|
150 |
+
" if self.config.params_is_augmentation:\n",
|
151 |
+
" train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
152 |
+
" \n",
|
153 |
+
" \n",
|
154 |
+
" rotation_range = 40,\n",
|
155 |
+
" horizontal_flip = True,\n",
|
156 |
+
" width_shift_range = 0.2,\n",
|
157 |
+
" height_shift_range = 0.2,\n",
|
158 |
+
" shear_range = 0.2,\n",
|
159 |
+
" zoom_range = 0.2,\n",
|
160 |
+
" **datagenerator_kwargs\n",
|
161 |
+
" )\n",
|
162 |
+
" \n",
|
163 |
+
" \n",
|
164 |
+
" else:\n",
|
165 |
+
" train_datagenerator = valid_datagernerator\n",
|
166 |
+
" self.train_generator = train_datagenerator.flow_from_directory(\n",
|
167 |
+
" directory = self.config.training_data,\n",
|
168 |
+
" subset = 'training',\n",
|
169 |
+
" shuffle = True,\n",
|
170 |
+
" **dataflow_kwargs\n",
|
171 |
+
" )\n",
|
172 |
+
" \n",
|
173 |
+
" @staticmethod\n",
|
174 |
+
" def save_model(path: Path, model: tf.keras.Model):\n",
|
175 |
+
" model.save(path)\n",
|
176 |
+
" \n",
|
177 |
+
" def train(self):\n",
|
178 |
+
" self.steps_per_epchs = self.train_generator.samples // self.train_generator.batch_size\n",
|
179 |
+
" self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
|
180 |
+
" \n",
|
181 |
+
" self.model.fit(\n",
|
182 |
+
" self.train_generator,\n",
|
183 |
+
" epochs = self.config.params_epochs,\n",
|
184 |
+
" steps_per_epoch = self.steps_per_epchs,\n",
|
185 |
+
" validation_steps = self.validation_steps,\n",
|
186 |
+
" validation_data = self.valid_generator\n",
|
187 |
+
" )\n",
|
188 |
+
" \n",
|
189 |
+
" self.save_model(\n",
|
190 |
+
" path = self.config.training_data,\n",
|
191 |
+
" model = self.model\n",
|
192 |
+
" )\n",
|
193 |
+
"\n",
|
194 |
+
" "
|
195 |
+
]
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"cell_type": "code",
|
199 |
+
"execution_count": 20,
|
200 |
+
"metadata": {},
|
201 |
+
"outputs": [
|
202 |
+
{
|
203 |
+
"name": "stdout",
|
204 |
+
"output_type": "stream",
|
205 |
+
"text": [
|
206 |
+
"[2024-07-31 20:16:53,704: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
207 |
+
"[2024-07-31 20:16:53,707: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
208 |
+
"[2024-07-31 20:16:53,709: INFO: common: Created directory at: artifacts]\n",
|
209 |
+
"[2024-07-31 20:16:53,711: INFO: common: Created directory at: artifacts\\training]\n",
|
210 |
+
"Found 93 images belonging to 2 classes.\n",
|
211 |
+
"Found 372 images belonging to 2 classes.\n",
|
212 |
+
"Epoch 1/10\n",
|
213 |
+
"[2024-07-31 20:16:55,760: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
|
214 |
+
"]\n",
|
215 |
+
"23/23 [==============================] - 32s 1s/step - loss: 0.6976 - accuracy: 0.5983 - val_loss: 0.5528 - val_accuracy: 0.6750\n",
|
216 |
+
"Epoch 2/10\n",
|
217 |
+
"23/23 [==============================] - 18s 776ms/step - loss: 0.5961 - accuracy: 0.7022 - val_loss: 0.5576 - val_accuracy: 0.8250\n",
|
218 |
+
"Epoch 3/10\n",
|
219 |
+
"23/23 [==============================] - 18s 780ms/step - loss: 0.5489 - accuracy: 0.7612 - val_loss: 0.6042 - val_accuracy: 0.5250\n",
|
220 |
+
"Epoch 4/10\n",
|
221 |
+
"23/23 [==============================] - 18s 779ms/step - loss: 0.5166 - accuracy: 0.8006 - val_loss: 0.5593 - val_accuracy: 0.5750\n",
|
222 |
+
"Epoch 5/10\n",
|
223 |
+
"23/23 [==============================] - 18s 774ms/step - loss: 0.4863 - accuracy: 0.7949 - val_loss: 0.6155 - val_accuracy: 0.5250\n",
|
224 |
+
"Epoch 6/10\n",
|
225 |
+
"23/23 [==============================] - 18s 789ms/step - loss: 0.4486 - accuracy: 0.8062 - val_loss: 0.5774 - val_accuracy: 0.5250\n",
|
226 |
+
"Epoch 7/10\n",
|
227 |
+
"23/23 [==============================] - 18s 772ms/step - loss: 0.4574 - accuracy: 0.8034 - val_loss: 0.5751 - val_accuracy: 0.5125\n",
|
228 |
+
"Epoch 8/10\n",
|
229 |
+
"23/23 [==============================] - 18s 772ms/step - loss: 0.4493 - accuracy: 0.7949 - val_loss: 0.5814 - val_accuracy: 0.5125\n",
|
230 |
+
"Epoch 9/10\n",
|
231 |
+
"23/23 [==============================] - 18s 772ms/step - loss: 0.4414 - accuracy: 0.7921 - val_loss: 0.5636 - val_accuracy: 0.5125\n",
|
232 |
+
"Epoch 10/10\n",
|
233 |
+
"23/23 [==============================] - 18s 794ms/step - loss: 0.4290 - accuracy: 0.8090 - val_loss: 0.5743 - val_accuracy: 0.5000\n",
|
234 |
+
"[2024-07-31 20:20:09,590: INFO: builder_impl: Assets written to: artifacts\\data_ingestion\\unzip\\kidney-ct-scan-image\\assets]\n"
|
235 |
+
]
|
236 |
+
}
|
237 |
+
],
|
238 |
+
"source": [
|
239 |
+
"try:\n",
|
240 |
+
" config = ConfigurationManager()\n",
|
241 |
+
" training_config = config.get_training_config()\n",
|
242 |
+
" training = Training(confg=training_config)\n",
|
243 |
+
" training.get_base_model()\n",
|
244 |
+
" training.train_vaid_generator()\n",
|
245 |
+
" training.train()\n",
|
246 |
+
" \n",
|
247 |
+
"except Exception as e:\n",
|
248 |
+
" raise e"
|
249 |
+
]
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"cell_type": "markdown",
|
253 |
+
"metadata": {},
|
254 |
+
"source": []
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": null,
|
259 |
+
"metadata": {},
|
260 |
+
"outputs": [],
|
261 |
+
"source": []
|
262 |
+
}
|
263 |
+
],
|
264 |
+
"metadata": {
|
265 |
+
"kernelspec": {
|
266 |
+
"display_name": "Python 3",
|
267 |
+
"language": "python",
|
268 |
+
"name": "python3"
|
269 |
+
},
|
270 |
+
"language_info": {
|
271 |
+
"codemirror_mode": {
|
272 |
+
"name": "ipython",
|
273 |
+
"version": 3
|
274 |
+
},
|
275 |
+
"file_extension": ".py",
|
276 |
+
"mimetype": "text/x-python",
|
277 |
+
"name": "python",
|
278 |
+
"nbconvert_exporter": "python",
|
279 |
+
"pygments_lexer": "ipython3",
|
280 |
+
"version": "3.11.0"
|
281 |
+
}
|
282 |
+
},
|
283 |
+
"nbformat": 4,
|
284 |
+
"nbformat_minor": 2
|
285 |
+
}
|
research/model_evaluation.ipynb
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"os.chdir('../')"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 2,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [
|
18 |
+
{
|
19 |
+
"data": {
|
20 |
+
"text/plain": [
|
21 |
+
"'d:\\\\MLOps-Project\\\\Kidney-disease-classification-mlops'"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"execution_count": 2,
|
25 |
+
"metadata": {},
|
26 |
+
"output_type": "execute_result"
|
27 |
+
}
|
28 |
+
],
|
29 |
+
"source": [
|
30 |
+
"%pwd"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": 3,
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"import tensorflow as tf"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 4,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [],
|
47 |
+
"source": [
|
48 |
+
"model = tf.keras.models.load_model('artifacts/training/model.h5')"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "code",
|
53 |
+
"execution_count": 5,
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"from dataclasses import dataclass\n",
|
58 |
+
"from pathlib import Path\n",
|
59 |
+
"\n",
|
60 |
+
"@dataclass(frozen=True)\n",
|
61 |
+
"class EvaluationConfig:\n",
|
62 |
+
" path_of_model: Path\n",
|
63 |
+
" training_data: Path\n",
|
64 |
+
" all_params: dict\n",
|
65 |
+
" mlflow_uri: str\n",
|
66 |
+
" params_image_size: list\n",
|
67 |
+
" params_batch_size: int\n",
|
68 |
+
" "
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"cell_type": "code",
|
73 |
+
"execution_count": 12,
|
74 |
+
"metadata": {},
|
75 |
+
"outputs": [],
|
76 |
+
"source": [
|
77 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories,save_json\n",
|
78 |
+
"from cnnClassifier.constant import *\n",
|
79 |
+
"\n",
|
80 |
+
"from cnnClassifier.utils.common import read_yaml, create_directories\n",
|
81 |
+
"from cnnClassifier.constant import *\n",
|
82 |
+
"# Configuration\n",
|
83 |
+
"class ConfigurationManager:\n",
|
84 |
+
" def __init__(\n",
|
85 |
+
" self,\n",
|
86 |
+
" config_filepath = CONFIG_FILE_PATH,\n",
|
87 |
+
" params_filepath = PARAMS_FILE_PATH):\n",
|
88 |
+
"\n",
|
89 |
+
" self.config = read_yaml(config_filepath)\n",
|
90 |
+
" self.params = read_yaml(params_filepath)\n",
|
91 |
+
"\n",
|
92 |
+
" create_directories([self.config.atifacts_root])\n",
|
93 |
+
" \n",
|
94 |
+
" def get_evaluation_config(self) -> EvaluationConfig:\n",
|
95 |
+
" eval_config = EvaluationConfig(\n",
|
96 |
+
" path_of_model='artifacts/training/model.h5',\n",
|
97 |
+
" training_data='artifacts/data_ingestion/unzip/kidney-ct-scan-image',\n",
|
98 |
+
" mlflow_uri='https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow',\n",
|
99 |
+
" all_params= self.params,\n",
|
100 |
+
" params_image_size=self.params.IMAGE_SIZE,\n",
|
101 |
+
" params_batch_size=self.params.BATCH_SIZE\n",
|
102 |
+
" )\n",
|
103 |
+
" return eval_config\n",
|
104 |
+
" \n",
|
105 |
+
" "
|
106 |
+
]
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"cell_type": "code",
|
110 |
+
"execution_count": 13,
|
111 |
+
"metadata": {},
|
112 |
+
"outputs": [],
|
113 |
+
"source": [
|
114 |
+
"import tensorflow as tf\n",
|
115 |
+
"from pathlib import Path\n",
|
116 |
+
"import mlflow\n",
|
117 |
+
"import mlflow.keras\n",
|
118 |
+
"from urllib.parse import urlparse"
|
119 |
+
]
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"cell_type": "code",
|
123 |
+
"execution_count": 17,
|
124 |
+
"metadata": {},
|
125 |
+
"outputs": [],
|
126 |
+
"source": [
|
127 |
+
"class Evaluation:\n",
|
128 |
+
" def __init__(self, config: EvaluationConfig):\n",
|
129 |
+
" self.config = config\n",
|
130 |
+
" \n",
|
131 |
+
" def _valid_generator(self):\n",
|
132 |
+
"\n",
|
133 |
+
" datagenerator_kwargs = dict(\n",
|
134 |
+
" rescale = 1./255,\n",
|
135 |
+
" validation_split=0.30\n",
|
136 |
+
" )\n",
|
137 |
+
"\n",
|
138 |
+
" dataflow_kwargs = dict(\n",
|
139 |
+
" target_size=self.config.params_image_size[:-1],\n",
|
140 |
+
" batch_size=self.config.params_batch_size,\n",
|
141 |
+
" interpolation=\"bilinear\"\n",
|
142 |
+
" )\n",
|
143 |
+
"\n",
|
144 |
+
" valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
|
145 |
+
" **datagenerator_kwargs\n",
|
146 |
+
" )\n",
|
147 |
+
"\n",
|
148 |
+
" self.valid_generator = valid_datagenerator.flow_from_directory(\n",
|
149 |
+
" directory=self.config.training_data,\n",
|
150 |
+
" subset=\"validation\",\n",
|
151 |
+
" shuffle=False,\n",
|
152 |
+
" **dataflow_kwargs\n",
|
153 |
+
" )\n",
|
154 |
+
" \n",
|
155 |
+
" @staticmethod\n",
|
156 |
+
" def load_model(path: Path) -> tf.keras.Model:\n",
|
157 |
+
" return tf.keras.models.load_model(path)\n",
|
158 |
+
" \n",
|
159 |
+
" \n",
|
160 |
+
" def evaluation(self):\n",
|
161 |
+
" self.model = self.load_model(self.config.path_of_model)\n",
|
162 |
+
" self._valid_generator()\n",
|
163 |
+
" self.score = model.evaluate(self.valid_generator)\n",
|
164 |
+
" self.save_score()\n",
|
165 |
+
" \n",
|
166 |
+
" def save_score(self):\n",
|
167 |
+
" scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
|
168 |
+
" save_json(path=Path(\"scores.json\"), data=scores)\n",
|
169 |
+
" \n",
|
170 |
+
" def log_into_mlflow(self):\n",
|
171 |
+
" mlflow.set_registry_uri(self.config.mlflow_uri)\n",
|
172 |
+
" tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
|
173 |
+
" \n",
|
174 |
+
" with mlflow.start_run():\n",
|
175 |
+
" mlflow.log_params(self.config.all_params)\n",
|
176 |
+
" mlflow.log_metrics(\n",
|
177 |
+
" {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
|
178 |
+
" )\n",
|
179 |
+
" # Model registry does not work with file store\n",
|
180 |
+
" if tracking_url_type_store != \"file\":\n",
|
181 |
+
"\n",
|
182 |
+
" # Register the model\n",
|
183 |
+
" # There are other ways to use the Model Registry, which depends on the use case,\n",
|
184 |
+
" # please refer to the doc for more information:\n",
|
185 |
+
" # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
|
186 |
+
" mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
|
187 |
+
" else:\n",
|
188 |
+
" mlflow.keras.log_model(self.model, \"model\")\n",
|
189 |
+
" "
|
190 |
+
]
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"cell_type": "code",
|
194 |
+
"execution_count": 15,
|
195 |
+
"metadata": {},
|
196 |
+
"outputs": [
|
197 |
+
{
|
198 |
+
"name": "stdout",
|
199 |
+
"output_type": "stream",
|
200 |
+
"text": [
|
201 |
+
"[2024-08-01 23:40:02,445: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/azizulhakim8291/Kidney-disease-classification-mlops \"HTTP/1.1 200 OK\"]\n"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"data": {
|
206 |
+
"text/html": [
|
207 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Initialized MLflow to track repo <span style=\"color: #008000; text-decoration-color: #008000\">\"azizulhakim8291/Kidney-disease-classification-mlops\"</span>\n",
|
208 |
+
"</pre>\n"
|
209 |
+
],
|
210 |
+
"text/plain": [
|
211 |
+
"Initialized MLflow to track repo \u001b[32m\"azizulhakim8291/Kidney-disease-classification-mlops\"\u001b[0m\n"
|
212 |
+
]
|
213 |
+
},
|
214 |
+
"metadata": {},
|
215 |
+
"output_type": "display_data"
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"name": "stdout",
|
219 |
+
"output_type": "stream",
|
220 |
+
"text": [
|
221 |
+
"[2024-08-01 23:40:02,453: INFO: helpers: Initialized MLflow to track repo \"azizulhakim8291/Kidney-disease-classification-mlops\"]\n"
|
222 |
+
]
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"data": {
|
226 |
+
"text/html": [
|
227 |
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!\n",
|
228 |
+
"</pre>\n"
|
229 |
+
],
|
230 |
+
"text/plain": [
|
231 |
+
"Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!\n"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
"metadata": {},
|
235 |
+
"output_type": "display_data"
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"name": "stdout",
|
239 |
+
"output_type": "stream",
|
240 |
+
"text": [
|
241 |
+
"[2024-08-01 23:40:02,458: INFO: helpers: Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!]\n"
|
242 |
+
]
|
243 |
+
}
|
244 |
+
],
|
245 |
+
"source": [
|
246 |
+
"import dagshub\n",
|
247 |
+
"dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)\n",
|
248 |
+
"\n",
|
249 |
+
"import mlflow\n",
|
250 |
+
"with mlflow.start_run():\n",
|
251 |
+
" mlflow.log_param('parameter name', 'value')\n",
|
252 |
+
" mlflow.log_metric('metric name', 1)"
|
253 |
+
]
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"cell_type": "code",
|
257 |
+
"execution_count": 19,
|
258 |
+
"metadata": {},
|
259 |
+
"outputs": [
|
260 |
+
{
|
261 |
+
"name": "stdout",
|
262 |
+
"output_type": "stream",
|
263 |
+
"text": [
|
264 |
+
"[2024-08-01 23:44:03,754: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
|
265 |
+
"[2024-08-01 23:44:03,764: INFO: common: yaml file: params.yaml loaded successfully]\n",
|
266 |
+
"[2024-08-01 23:44:03,770: INFO: common: Created directory at: artifacts]\n",
|
267 |
+
"Found 139 images belonging to 2 classes.\n",
|
268 |
+
"9/9 [==============================] - 13s 1s/step - loss: 0.5003 - accuracy: 0.9568\n",
|
269 |
+
"[2024-08-01 23:44:17,498: INFO: common: Json file saved at: scores.json]\n"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"name": "stderr",
|
274 |
+
"output_type": "stream",
|
275 |
+
"text": [
|
276 |
+
"2024/08/01 23:44:20 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n"
|
277 |
+
]
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"name": "stdout",
|
281 |
+
"output_type": "stream",
|
282 |
+
"text": [
|
283 |
+
"[2024-08-01 23:44:22,432: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n",
|
284 |
+
"INFO:tensorflow:Assets written to: C:\\Users\\User\\AppData\\Local\\Temp\\tmp8n0wc3k0\\model\\data\\model\\assets\n",
|
285 |
+
"[2024-08-01 23:44:24,256: INFO: builder_impl: Assets written to: C:\\Users\\User\\AppData\\Local\\Temp\\tmp8n0wc3k0\\model\\data\\model\\assets]\n"
|
286 |
+
]
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"name": "stderr",
|
290 |
+
"output_type": "stream",
|
291 |
+
"text": [
|
292 |
+
"Registered model 'VGG16Model' already exists. Creating a new version of this model...\n",
|
293 |
+
"2024/08/01 23:47:06 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2\n",
|
294 |
+
"Created version '2' of model 'VGG16Model'.\n"
|
295 |
+
]
|
296 |
+
}
|
297 |
+
],
|
298 |
+
"source": [
|
299 |
+
"try:\n",
|
300 |
+
" config = ConfigurationManager()\n",
|
301 |
+
" eval_config = config.get_evaluation_config()\n",
|
302 |
+
" evaluation = Evaluation(eval_config)\n",
|
303 |
+
" evaluation.evaluation()\n",
|
304 |
+
" evaluation.log_into_mlflow()\n",
|
305 |
+
"\n",
|
306 |
+
"except Exception as e:\n",
|
307 |
+
" raise e"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"cell_type": "code",
|
312 |
+
"execution_count": null,
|
313 |
+
"metadata": {},
|
314 |
+
"outputs": [],
|
315 |
+
"source": []
|
316 |
+
}
|
317 |
+
],
|
318 |
+
"metadata": {
|
319 |
+
"kernelspec": {
|
320 |
+
"display_name": "Python 3",
|
321 |
+
"language": "python",
|
322 |
+
"name": "python3"
|
323 |
+
},
|
324 |
+
"language_info": {
|
325 |
+
"codemirror_mode": {
|
326 |
+
"name": "ipython",
|
327 |
+
"version": 3
|
328 |
+
},
|
329 |
+
"file_extension": ".py",
|
330 |
+
"mimetype": "text/x-python",
|
331 |
+
"name": "python",
|
332 |
+
"nbconvert_exporter": "python",
|
333 |
+
"pygments_lexer": "ipython3",
|
334 |
+
"version": "3.11.0"
|
335 |
+
}
|
336 |
+
},
|
337 |
+
"nbformat": 4,
|
338 |
+
"nbformat_minor": 2
|
339 |
+
}
|
research/trials.ipynb
ADDED
File without changes
|
scores.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"loss": 14.36583423614502,
|
3 |
+
"accuracy": 0.5179855823516846
|
4 |
+
}
|
setup.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import setuptools
|
2 |
+
with open('README.md', 'r') as f:
|
3 |
+
long_description = f.read()
|
4 |
+
|
5 |
+
__version__ = '0.0.0'
|
6 |
+
|
7 |
+
REPO_NAME = "Kidney-disease-classification-mlops"
|
8 |
+
AUTHOR_USER_NAME = "HAKIM-ML"
|
9 |
+
SRC_REPO = "cnnClassifier"
|
10 |
+
AUTHOR_EMAIL = "[email protected]"
|
11 |
+
|
12 |
+
|
13 |
+
setuptools.setup(
|
14 |
+
name=SRC_REPO,
|
15 |
+
version=__version__,
|
16 |
+
author=AUTHOR_USER_NAME,
|
17 |
+
author_email=AUTHOR_EMAIL,
|
18 |
+
description="A small python package for CNN app",
|
19 |
+
long_description=long_description,
|
20 |
+
long_description_content="text/markdown",
|
21 |
+
url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
|
22 |
+
project_urls={
|
23 |
+
"Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
|
24 |
+
},
|
25 |
+
package_dir={"": "src"},
|
26 |
+
packages=setuptools.find_packages(where="src")
|
27 |
+
)
|
src/cnnClassifier/__init__.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import logging
|
4 |
+
|
5 |
+
logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
|
6 |
+
|
7 |
+
log_dir = "logs"
|
8 |
+
log_filepath = os.path.join(log_dir,"running_logs.log")
|
9 |
+
os.makedirs(log_dir, exist_ok=True)
|
10 |
+
|
11 |
+
|
12 |
+
logging.basicConfig(
|
13 |
+
level= logging.INFO,
|
14 |
+
format= logging_str,
|
15 |
+
|
16 |
+
handlers=[
|
17 |
+
logging.FileHandler(log_filepath),
|
18 |
+
logging.StreamHandler(sys.stdout)
|
19 |
+
]
|
20 |
+
)
|
21 |
+
|
22 |
+
logger = logging.getLogger("cnnClassifierLogger")
|
src/cnnClassifier/components/__init__.py
ADDED
File without changes
|
src/cnnClassifier/components/data_ingestion.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# components
|
2 |
+
|
3 |
+
import os
|
4 |
+
import zipfile
|
5 |
+
import gdown
|
6 |
+
from cnnClassifier import logger
|
7 |
+
from cnnClassifier.utils.common import get_size
|
8 |
+
from cnnClassifier.entity.config_entity import DataIngestionConfig
|
9 |
+
|
10 |
+
class DataIngestion:
|
11 |
+
def __init__(self, config: DataIngestionConfig):
|
12 |
+
self.config = config
|
13 |
+
|
14 |
+
def download_file(self) -> str:
|
15 |
+
try:
|
16 |
+
dataset_url = self.config.source_URL
|
17 |
+
zip_download_dir = self.config.local_data_file
|
18 |
+
os.makedirs('artifacts/datasets', exist_ok=True)
|
19 |
+
logger.info(f'Downloading data from {dataset_url} into {zip_download_dir}')
|
20 |
+
|
21 |
+
file_id = dataset_url.split('/')[-2]
|
22 |
+
prefix = 'https://drive.google.com/uc?/export=download&id='
|
23 |
+
gdown.download(prefix + file_id, zip_download_dir)
|
24 |
+
logger.info(f'Downloaded data from {dataset_url} into {zip_download_dir}')
|
25 |
+
|
26 |
+
except Exception as e:
|
27 |
+
raise e
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
def extract_zip_file(self):
|
32 |
+
"""
|
33 |
+
zip_file_path: str
|
34 |
+
Extracts the zip file into the data directory
|
35 |
+
Function returns None
|
36 |
+
"""
|
37 |
+
unzip_path = self.config.unzip_dir
|
38 |
+
os.makedirs(unzip_path, exist_ok=True)
|
39 |
+
with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
|
40 |
+
zip_ref.extractall(unzip_path)
|
src/cnnClassifier/components/evaluation.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from pathlib import Path
|
3 |
+
import mlflow
|
4 |
+
import mlflow.keras
|
5 |
+
from urllib.parse import urlparse
|
6 |
+
from cnnClassifier.utils.common import save_json
|
7 |
+
from cnnClassifier.entity.config_entity import EvaluationConfig
|
8 |
+
|
9 |
+
|
10 |
+
model = tf.keras.models.load_model('artifacts/training/model.h5')
|
11 |
+
|
12 |
+
import dagshub
|
13 |
+
dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)
|
14 |
+
|
15 |
+
import mlflow
|
16 |
+
with mlflow.start_run():
|
17 |
+
mlflow.log_param('parameter name', 'value')
|
18 |
+
mlflow.log_metric('metric name', 1)
|
19 |
+
|
20 |
+
|
21 |
+
class Evaluation:
|
22 |
+
def __init__(self, config: EvaluationConfig):
|
23 |
+
self.config = config
|
24 |
+
|
25 |
+
def _valid_generator(self):
|
26 |
+
|
27 |
+
datagenerator_kwargs = dict(
|
28 |
+
rescale = 1./255,
|
29 |
+
validation_split=0.30
|
30 |
+
)
|
31 |
+
|
32 |
+
dataflow_kwargs = dict(
|
33 |
+
target_size=self.config.params_image_size[:-1],
|
34 |
+
batch_size=self.config.params_batch_size,
|
35 |
+
interpolation="bilinear"
|
36 |
+
)
|
37 |
+
|
38 |
+
valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
|
39 |
+
**datagenerator_kwargs
|
40 |
+
)
|
41 |
+
|
42 |
+
self.valid_generator = valid_datagenerator.flow_from_directory(
|
43 |
+
directory=self.config.training_data,
|
44 |
+
subset="validation",
|
45 |
+
shuffle=False,
|
46 |
+
**dataflow_kwargs
|
47 |
+
)
|
48 |
+
|
49 |
+
@staticmethod
|
50 |
+
def load_model(path: Path) -> tf.keras.Model:
|
51 |
+
return tf.keras.models.load_model(path)
|
52 |
+
|
53 |
+
|
54 |
+
def evaluation(self):
|
55 |
+
self.model = self.load_model(self.config.path_of_model)
|
56 |
+
self._valid_generator()
|
57 |
+
self.score = model.evaluate(self.valid_generator)
|
58 |
+
self.save_score()
|
59 |
+
|
60 |
+
def save_score(self):
|
61 |
+
scores = {"loss": self.score[0], "accuracy": self.score[1]}
|
62 |
+
save_json(path=Path("scores.json"), data=scores)
|
63 |
+
|
64 |
+
def log_into_mlflow(self):
|
65 |
+
mlflow.set_registry_uri(self.config.mlflow_uri)
|
66 |
+
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
67 |
+
|
68 |
+
with mlflow.start_run():
|
69 |
+
mlflow.log_params(self.config.all_params)
|
70 |
+
mlflow.log_metrics(
|
71 |
+
{"loss": self.score[0], "accuracy": self.score[1]}
|
72 |
+
)
|
73 |
+
# Model registry does not work with file store
|
74 |
+
if tracking_url_type_store != "file":
|
75 |
+
|
76 |
+
# Register the model
|
77 |
+
# There are other ways to use the Model Registry, which depends on the use case,
|
78 |
+
# please refer to the doc for more information:
|
79 |
+
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
|
80 |
+
mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
|
81 |
+
else:
|
82 |
+
mlflow.keras.log_model(self.model, "model")
|
83 |
+
|
src/cnnClassifier/components/prepare_base_model.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
import tensorflow as tf
|
4 |
+
from cnnClassifier.config.configuration import PrepareBaseModelConfig
|
5 |
+
|
6 |
+
class PrepareBaseModel:
|
7 |
+
def __init__(self, config: PrepareBaseModelConfig):
|
8 |
+
self.config = config
|
9 |
+
|
10 |
+
|
11 |
+
def get_base_model(self):
|
12 |
+
self.model = tf.keras.applications.vgg16.VGG16(
|
13 |
+
input_shape=self.config.params_image_size,
|
14 |
+
weights=self.config.params_weights,
|
15 |
+
include_top=self.config.params_include_top
|
16 |
+
)
|
17 |
+
|
18 |
+
self.save_model(path=self.config.base_model_path, model=self.model)
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
@staticmethod
|
23 |
+
def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
|
24 |
+
if freeze_all:
|
25 |
+
for layer in model.layers:
|
26 |
+
model.trainable = False
|
27 |
+
elif (freeze_till is not None) and (freeze_till > 0):
|
28 |
+
for layer in model.layers[:-freeze_till]:
|
29 |
+
model.trainable = False
|
30 |
+
|
31 |
+
flatten_in = tf.keras.layers.Flatten()(model.output)
|
32 |
+
prediction = tf.keras.layers.Dense(
|
33 |
+
units=classes,
|
34 |
+
activation="softmax"
|
35 |
+
)(flatten_in)
|
36 |
+
|
37 |
+
full_model = tf.keras.models.Model(
|
38 |
+
inputs=model.input,
|
39 |
+
outputs=prediction
|
40 |
+
)
|
41 |
+
|
42 |
+
full_model.compile(
|
43 |
+
optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
|
44 |
+
loss=tf.keras.losses.CategoricalCrossentropy(),
|
45 |
+
metrics=["accuracy"]
|
46 |
+
)
|
47 |
+
|
48 |
+
full_model.summary()
|
49 |
+
return full_model
|
50 |
+
|
51 |
+
|
52 |
+
def update_base_model(self):
|
53 |
+
self.full_model = self._prepare_full_model(
|
54 |
+
model=self.model,
|
55 |
+
classes=self.config.params_classes,
|
56 |
+
freeze_all=True,
|
57 |
+
freeze_till=None,
|
58 |
+
learning_rate=self.config.params_learning_rate
|
59 |
+
)
|
60 |
+
|
61 |
+
self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def save_model(path: Path, model: tf.keras.Model):
|
67 |
+
model.save(path)
|
src/cnnClassifier/components/training.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.entity.config_entity import TrainingConfig
|
2 |
+
import tensorflow as tf
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
class Training:
|
6 |
+
def __init__(self, config: TrainingConfig):
|
7 |
+
self.config = config
|
8 |
+
|
9 |
+
|
10 |
+
def get_base_model(self):
|
11 |
+
self.model = tf.keras.models.load_model(
|
12 |
+
self.config.updated_base_model_path
|
13 |
+
)
|
14 |
+
|
15 |
+
def train_valid_generator(self):
|
16 |
+
|
17 |
+
datagenerator_kwargs = dict(
|
18 |
+
rescale = 1./255,
|
19 |
+
validation_split=0.20
|
20 |
+
)
|
21 |
+
|
22 |
+
dataflow_kwargs = dict(
|
23 |
+
target_size=self.config.params_image_size[:-1],
|
24 |
+
batch_size=self.config.params_batch_size,
|
25 |
+
interpolation="bilinear"
|
26 |
+
)
|
27 |
+
|
28 |
+
valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
|
29 |
+
**datagenerator_kwargs
|
30 |
+
)
|
31 |
+
|
32 |
+
self.valid_generator = valid_datagenerator.flow_from_directory(
|
33 |
+
directory=self.config.training_data,
|
34 |
+
subset="validation",
|
35 |
+
shuffle=False,
|
36 |
+
**dataflow_kwargs
|
37 |
+
)
|
38 |
+
|
39 |
+
if self.config.params_is_augmentation:
|
40 |
+
train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
|
41 |
+
rotation_range=40,
|
42 |
+
horizontal_flip=True,
|
43 |
+
width_shift_range=0.2,
|
44 |
+
height_shift_range=0.2,
|
45 |
+
shear_range=0.2,
|
46 |
+
zoom_range=0.2,
|
47 |
+
**datagenerator_kwargs
|
48 |
+
)
|
49 |
+
else:
|
50 |
+
train_datagenerator = valid_datagenerator
|
51 |
+
|
52 |
+
self.train_generator = train_datagenerator.flow_from_directory(
|
53 |
+
directory=self.config.training_data,
|
54 |
+
subset="training",
|
55 |
+
shuffle=True,
|
56 |
+
**dataflow_kwargs
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
@staticmethod
|
61 |
+
def save_model(path: Path, model: tf.keras.Model):
|
62 |
+
model.save(path)
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
def train(self):
|
68 |
+
self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
|
69 |
+
self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
|
70 |
+
|
71 |
+
self.model.fit(
|
72 |
+
self.train_generator,
|
73 |
+
epochs=self.config.params_epochs,
|
74 |
+
steps_per_epoch=self.steps_per_epoch,
|
75 |
+
validation_steps=self.validation_steps,
|
76 |
+
validation_data=self.valid_generator
|
77 |
+
)
|
78 |
+
|
79 |
+
self.save_model(
|
80 |
+
path=self.config.trained_model_path,
|
81 |
+
model=self.model
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
src/cnnClassifier/config/__init__.py
ADDED
File without changes
|
src/cnnClassifier/config/configuration.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.utils.common import read_yaml, create_directories
|
2 |
+
from cnnClassifier.constant import *
|
3 |
+
from cnnClassifier.entity.config_entity import (DataIngestionConfig,
|
4 |
+
PrepareBaseModelConfig,
|
5 |
+
TrainingConfig,EvaluationConfig)
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
class ConfigurationManager:
|
11 |
+
def __init__(
|
12 |
+
self,
|
13 |
+
config_filepath = CONFIG_FILE_PATH,
|
14 |
+
params_filepath = PARAMS_FILE_PATH
|
15 |
+
):
|
16 |
+
self.config = read_yaml(config_filepath)
|
17 |
+
self.params = read_yaml(params_filepath)
|
18 |
+
|
19 |
+
create_directories([self.config.atifacts_root])
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
def get_data_ingestion_config(self) -> DataIngestionConfig:
|
24 |
+
config = self.config.data_ingestion
|
25 |
+
create_directories([config.root_dir])
|
26 |
+
|
27 |
+
data_ingestion_config = DataIngestionConfig(
|
28 |
+
root_dir=config.root_dir,
|
29 |
+
source_URL=config.source_URL,
|
30 |
+
local_data_file=config.local_data_file,
|
31 |
+
unzip_dir=config.unzip_dir
|
32 |
+
)
|
33 |
+
|
34 |
+
return data_ingestion_config
|
35 |
+
|
36 |
+
|
37 |
+
def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
|
38 |
+
config = self.config.prepare_base_model
|
39 |
+
|
40 |
+
create_directories([config.root_dir])
|
41 |
+
|
42 |
+
prepare_base_model_config = PrepareBaseModelConfig(
|
43 |
+
root_dir=Path(config.root_dir),
|
44 |
+
base_model_path=Path(config.base_model_path),
|
45 |
+
updated_base_model_path=Path(config.updated_base_model_path),
|
46 |
+
params_image_size=self.params.IMAGE_SIZE,
|
47 |
+
params_learning_rate=self.params.LEARNING_RATE,
|
48 |
+
params_include_top=self.params.INCLUDE_TOP,
|
49 |
+
params_weights=self.params.WEIGHTS,
|
50 |
+
params_classes=self.params.CLASSES
|
51 |
+
)
|
52 |
+
|
53 |
+
return prepare_base_model_config
|
54 |
+
|
55 |
+
|
56 |
+
def get_training_config(self) -> TrainingConfig:
|
57 |
+
training = self.config.training
|
58 |
+
prepare_base_model = self.config.prepare_base_model
|
59 |
+
params = self.params
|
60 |
+
training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-ct-scan-image")
|
61 |
+
create_directories([
|
62 |
+
Path(training.root_dir)
|
63 |
+
])
|
64 |
+
|
65 |
+
training_config = TrainingConfig(
|
66 |
+
root_dir=Path(training.root_dir),
|
67 |
+
trained_model_path=Path(training.trained_model_path),
|
68 |
+
updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
|
69 |
+
training_data=Path(training_data),
|
70 |
+
params_epochs=params.EPOCHS,
|
71 |
+
params_batch_size=params.BATCH_SIZE,
|
72 |
+
params_is_augmentation=params.AUGMENTATION,
|
73 |
+
params_image_size=params.IMAGE_SIZE
|
74 |
+
)
|
75 |
+
|
76 |
+
return training_config
|
77 |
+
|
78 |
+
def get_evaluation_config(self) -> EvaluationConfig:
|
79 |
+
eval_config = EvaluationConfig(
|
80 |
+
path_of_model='artifacts/training/model.h5',
|
81 |
+
training_data='artifacts/data_ingestion/unzip/kidney-ct-scan-image',
|
82 |
+
mlflow_uri='https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow',
|
83 |
+
all_params= self.params,
|
84 |
+
params_image_size=self.params.IMAGE_SIZE,
|
85 |
+
params_batch_size=self.params.BATCH_SIZE
|
86 |
+
)
|
87 |
+
return eval_config
|
88 |
+
|
src/cnnClassifier/constant/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
CONFIG_FILE_PATH = Path("config/config.yaml")
|
4 |
+
PARAMS_FILE_PATH = Path("params.yaml")
|
src/cnnClassifier/entity/__init__.py
ADDED
File without changes
|
src/cnnClassifier/entity/config_entity.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# entity
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
@dataclass(frozen=True)
|
6 |
+
class DataIngestionConfig:
|
7 |
+
root_dir: Path
|
8 |
+
source_URL: str
|
9 |
+
local_data_file: Path
|
10 |
+
unzip_dir: Path
|
11 |
+
|
12 |
+
@dataclass(frozen=True)
|
13 |
+
class PrepareBaseModelConfig:
|
14 |
+
root_dir: Path
|
15 |
+
base_model_path: Path
|
16 |
+
updated_base_model_path: Path
|
17 |
+
params_image_size: list
|
18 |
+
params_learning_rate: float
|
19 |
+
params_include_top: bool
|
20 |
+
params_weights: str
|
21 |
+
params_classes: int
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
@dataclass(frozen=True)
|
26 |
+
class TrainingConfig:
|
27 |
+
root_dir: Path
|
28 |
+
trained_model_path: Path
|
29 |
+
updated_base_model_path: Path
|
30 |
+
training_data: Path
|
31 |
+
params_epochs: int
|
32 |
+
params_batch_size: int
|
33 |
+
params_is_augmentation: bool
|
34 |
+
params_image_size: list
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
@dataclass(frozen=True)
|
40 |
+
class EvaluationConfig:
|
41 |
+
path_of_model: Path
|
42 |
+
training_data: Path
|
43 |
+
all_params: dict
|
44 |
+
mlflow_uri: str
|
45 |
+
params_image_size: list
|
46 |
+
params_batch_size: int
|
src/cnnClassifier/pipeline/__init__.py
ADDED
File without changes
|
src/cnnClassifier/pipeline/predict.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from tensorflow.keras.models import load_model
|
3 |
+
from tensorflow.keras.preprocessing import image
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
class Prediction:
|
9 |
+
def __init__(self,filename):
|
10 |
+
self.filename =filename
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
def predict(self):
|
15 |
+
# load model
|
16 |
+
model = load_model("model.h5")
|
17 |
+
|
18 |
+
imagename = self.filename
|
19 |
+
test_image = image.load_img(imagename, target_size = (224,224))
|
20 |
+
test_image = image.img_to_array(test_image)
|
21 |
+
test_image = np.expand_dims(test_image, axis = 0)
|
22 |
+
result = np.argmax(model.predict(test_image), axis=1)
|
23 |
+
print(result)
|
24 |
+
|
25 |
+
if result[0] == 1:
|
26 |
+
prediction = 'Normal'
|
27 |
+
else:
|
28 |
+
prediction = 'Tumor'
|
29 |
+
|
30 |
+
return prediction
|
src/cnnClassifier/pipeline/stage_01_data_ingestion.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
2 |
+
from cnnClassifier.components.data_ingestion import DataIngestion
|
3 |
+
from cnnClassifier import logger
|
4 |
+
STAGE_NAME = "Data Ingestion stage"
|
5 |
+
|
6 |
+
|
7 |
+
class DataIngestionTrainingPipeline:
|
8 |
+
def __init__(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
def main(self):
|
12 |
+
config = ConfigurationManager()
|
13 |
+
data_ingestion_config = config.get_data_ingestion_config()
|
14 |
+
data_ingestion = DataIngestion(config=data_ingestion_config)
|
15 |
+
data_ingestion.download_file()
|
16 |
+
data_ingestion.extract_zip_file()
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
if __name__ == '__main__':
|
21 |
+
try:
|
22 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
23 |
+
obj = DataIngestionTrainingPipeline()
|
24 |
+
obj.main()
|
25 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
26 |
+
except Exception as e:
|
27 |
+
logger.exception(e)
|
28 |
+
raise e
|
src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
2 |
+
from cnnClassifier.components.prepare_base_model import PrepareBaseModel
|
3 |
+
from cnnClassifier import logger
|
4 |
+
|
5 |
+
|
6 |
+
STAGE_NAME = "Prepare base model"
|
7 |
+
|
8 |
+
|
9 |
+
class PrepareBaseModelTrainingPipeline:
|
10 |
+
def __init__(self):
|
11 |
+
pass
|
12 |
+
|
13 |
+
def main(self):
|
14 |
+
config = ConfigurationManager()
|
15 |
+
prepare_base_model_config = config.get_prepare_base_model_config()
|
16 |
+
prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
|
17 |
+
prepare_base_model.get_base_model()
|
18 |
+
prepare_base_model.update_base_model()
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == '__main__':
|
23 |
+
try:
|
24 |
+
logger.info(f"*******************")
|
25 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
26 |
+
obj = PrepareBaseModelTrainingPipeline()
|
27 |
+
obj.main()
|
28 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
29 |
+
except Exception as e:
|
30 |
+
logger.exception(e)
|
31 |
+
raise e
|
src/cnnClassifier/pipeline/stage_03_train_model.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
2 |
+
from cnnClassifier.components.training import Training
|
3 |
+
|
4 |
+
from cnnClassifier import logger
|
5 |
+
|
6 |
+
|
7 |
+
STAGE_NAME = "Model Training"
|
8 |
+
|
9 |
+
|
10 |
+
class ModelTrainingPipeline:
|
11 |
+
def __init__(self):
|
12 |
+
pass
|
13 |
+
|
14 |
+
def main(self):
|
15 |
+
config = ConfigurationManager()
|
16 |
+
training_config = config.get_training_config()
|
17 |
+
training = Training(config=training_config)
|
18 |
+
training.get_base_model()
|
19 |
+
training.train_valid_generator()
|
20 |
+
training.train()
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
if __name__ == '__main__':
|
25 |
+
try:
|
26 |
+
logger.info(f"*******************")
|
27 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
28 |
+
obj = ModelTrainingPipeline()
|
29 |
+
obj.main()
|
30 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
31 |
+
except Exception as e:
|
32 |
+
logger.exception(e)
|
33 |
+
raise e
|
src/cnnClassifier/pipeline/stage_04_evaluation.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnnClassifier.config.configuration import ConfigurationManager
|
2 |
+
from cnnClassifier.components.evaluation import Evaluation
|
3 |
+
|
4 |
+
from cnnClassifier import logger
|
5 |
+
|
6 |
+
|
7 |
+
STAGE_NAME = "Evaluation"
|
8 |
+
|
9 |
+
|
10 |
+
class EvaluationTrainingPipeline:
|
11 |
+
def __init__(self):
|
12 |
+
pass
|
13 |
+
|
14 |
+
def main(self):
|
15 |
+
config = ConfigurationManager()
|
16 |
+
eval_config = config.get_evaluation_config()
|
17 |
+
evaluation = Evaluation(eval_config)
|
18 |
+
evaluation.evaluation()
|
19 |
+
evaluation.log_into_mlflow()
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
if __name__ == '__main__':
|
24 |
+
try:
|
25 |
+
logger.info(f"*******************")
|
26 |
+
logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
|
27 |
+
obj = EvaluationTrainingPipeline()
|
28 |
+
obj.main()
|
29 |
+
logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
|
30 |
+
except Exception as e:
|
31 |
+
logger.exception(e)
|
32 |
+
raise e
|
src/cnnClassifier/utils/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
CONFIG_FILE_PATH = Path("config/config.yaml")
|
4 |
+
PARAMS_FILE_PATH = Path("params.yaml")
|
src/cnnClassifier/utils/common.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from box.exceptions import BoxValueError
|
3 |
+
import yaml
|
4 |
+
from cnnClassifier import logger
|
5 |
+
import json
|
6 |
+
import joblib
|
7 |
+
from ensure import ensure_annotations
|
8 |
+
from box import ConfigBox
|
9 |
+
from pathlib import Path
|
10 |
+
from typing import Any
|
11 |
+
import base64
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
@ensure_annotations
|
16 |
+
def read_yaml(path_to_yaml: Path) -> ConfigBox:
|
17 |
+
"""reads yaml file and returns
|
18 |
+
|
19 |
+
Args:
|
20 |
+
path_to_yaml (str): path like input
|
21 |
+
|
22 |
+
Raises:
|
23 |
+
ValueError: if yaml file is empty
|
24 |
+
e: empty file
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
ConfigBox: ConfigBox type
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
with open(path_to_yaml) as yaml_file:
|
31 |
+
content = yaml.safe_load(yaml_file)
|
32 |
+
logger.info(f"yaml file: {path_to_yaml} loaded successfully")
|
33 |
+
return ConfigBox(content)
|
34 |
+
except BoxValueError:
|
35 |
+
raise ValueError("yaml file is empty")
|
36 |
+
except Exception as e:
|
37 |
+
raise e
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
@ensure_annotations
|
42 |
+
def create_directories(path_to_directories: list, verbose = True):
|
43 |
+
"""create list of directories
|
44 |
+
|
45 |
+
Args:
|
46 |
+
path_to_directories (list): list of path of directories
|
47 |
+
ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
|
48 |
+
"""
|
49 |
+
|
50 |
+
for path in path_to_directories:
|
51 |
+
os.makedirs(path, exist_ok=True)
|
52 |
+
if verbose:
|
53 |
+
logger.info(f'Created directory at: {path}')
|
54 |
+
|
55 |
+
@ensure_annotations
|
56 |
+
def save_json(path: Path, data: dict):
|
57 |
+
"""save json data
|
58 |
+
|
59 |
+
Args:
|
60 |
+
path (Path): path to json file
|
61 |
+
data (dict): data to be saved in json file
|
62 |
+
"""
|
63 |
+
|
64 |
+
|
65 |
+
with open(path, 'w') as f:
|
66 |
+
json.dump(data, f, indent=4)
|
67 |
+
|
68 |
+
logger.info(f'Json file saved at: {path}')
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
@ensure_annotations
|
74 |
+
def load_json(path: Path) -> ConfigBox:
|
75 |
+
"""load json files data
|
76 |
+
|
77 |
+
Args:
|
78 |
+
path (Path): path to json file
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
ConfigBox: data as class attributes instead of dict
|
82 |
+
"""
|
83 |
+
|
84 |
+
with open(path, 'r') as f:
|
85 |
+
content = json.load(f)
|
86 |
+
|
87 |
+
logger.info(f"Json file loaded successfully from: {path}")
|
88 |
+
return ConfigBox
|
89 |
+
|
90 |
+
|
91 |
+
@ensure_annotations
|
92 |
+
def save_bin(data: Any, path: Path):
|
93 |
+
"""save binary file
|
94 |
+
|
95 |
+
Args:
|
96 |
+
data (Any): data to be saved as binary
|
97 |
+
path (Path): path to binary file
|
98 |
+
"""
|
99 |
+
joblib.dump(value=data, filename=path)
|
100 |
+
logger.info(f'binary file saved at: {path}')
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
@ensure_annotations
|
105 |
+
def load_bin(path: Path) -> ConfigBox:
|
106 |
+
"""load binary data
|
107 |
+
|
108 |
+
Args:
|
109 |
+
path (Path): path to binary file
|
110 |
+
|
111 |
+
Returns:
|
112 |
+
Any: object stored in the file
|
113 |
+
"""
|
114 |
+
|
115 |
+
data = joblib.load(path)
|
116 |
+
logger.info(f'binary file has been loaded successfully from : {path}')
|
117 |
+
return data
|
118 |
+
|
119 |
+
|
120 |
+
@ensure_annotations
|
121 |
+
def get_size(path: Path) -> str:
|
122 |
+
'''
|
123 |
+
get size in KB
|
124 |
+
|
125 |
+
Args:
|
126 |
+
Path (Path): path of the file
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
str: size in KB'''
|
130 |
+
|
131 |
+
size_in_kb = round(os.path.getsize(path)/1024)
|
132 |
+
return f"~ {size_in_kb} KB"
|
133 |
+
|
134 |
+
|
135 |
+
def decodeImage(imgstring, fileName):
|
136 |
+
imgdata = base64.b64decode(imgstring)
|
137 |
+
with open(fileName, 'wb') as f:
|
138 |
+
f.write(imgdata)
|
139 |
+
f.close()
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
def encodeImageIntoBase64(croppedImagePath):
|
144 |
+
with open(croppedImagePath, 'rb') as f:
|
145 |
+
return base64.b64decode(f.read())
|
template.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
import logging
|
4 |
+
|
5 |
+
logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
|
6 |
+
|
7 |
+
project_name = 'cnnClassifier'
|
8 |
+
|
9 |
+
list_of_files = [
|
10 |
+
'.github/workflows/.gitkeep',
|
11 |
+
f"src/{project_name}/__init__.py",
|
12 |
+
f"src/{project_name}/components/__init__.py",
|
13 |
+
f"src/{project_name}/utils/__init__.py",
|
14 |
+
f"src/{project_name}/config/__init__.py",
|
15 |
+
f"src/{project_name}/config/configuration.py",
|
16 |
+
f"src/{project_name}/pipeline/__init__.py",
|
17 |
+
f"src/{project_name}/entity/__init__.py",
|
18 |
+
f"src/{project_name}/constant/__init__.py",
|
19 |
+
'config/config.yaml',
|
20 |
+
'dvc.yaml',
|
21 |
+
'params.yaml',
|
22 |
+
'requirements.txt',
|
23 |
+
'setup.py',
|
24 |
+
'research/trials.ipynb',
|
25 |
+
|
26 |
+
]
|
27 |
+
|
28 |
+
for filepath in list_of_files:
|
29 |
+
filepath = Path(filepath)
|
30 |
+
filedir, filename = os.path.split(filepath)
|
31 |
+
|
32 |
+
|
33 |
+
if filedir != "":
|
34 |
+
os.makedirs(filedir, exist_ok=True)
|
35 |
+
logging.info(f'Creating directory; {filedir} for the file: {filename}')
|
36 |
+
|
37 |
+
if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
|
38 |
+
with open(filepath, 'w') as f:
|
39 |
+
pass
|
40 |
+
logging.info(f'Creating an empty file: {filename}')
|
41 |
+
else:
|
42 |
+
logging.info(f'File: {filename} already exists')
|