hakim commited on
Commit
7195b15
·
1 Parent(s): ecb0e90

pipeline added

Browse files
.dvcignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Add patterns of files dvc should ignore, which could improve
2
+ # the performance. Learn more at
3
+ # https://dvc.org/doc/user-guide/dvcignore
.gitignore CHANGED
@@ -160,3 +160,5 @@ cython_debug/
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
 
 
 
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
163
+ artifacts/*
164
+ model.h5/*
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . /code
10
+
11
+ CMD ["streamlit", "run", "app.py"]
README.md CHANGED
@@ -1 +1,111 @@
1
- # Kidney-disease-classification-mlops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Image To Text App
3
+ emoji: 📹
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: streamlit
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
+
12
+
13
+ # Kidney-disease-classification-mlops
14
+
15
+ ## Workflows
16
+
17
+ 1. Update config.yaml
18
+ 2. Update secrets.yaml [Optional]
19
+ 3. Update params.yaml
20
+ 4. Update the entity
21
+ 5. Update the configuration manager in src config
22
+ 6. Update the components
23
+ 7. Update the pipeline
24
+ 8. Update the main.py
25
+ 9. Update the dvc.yaml
26
+ 10. app.py
27
+
28
+ # How to run?
29
+ ### STEPS:
30
+
31
+ Clone the repository
32
+
33
+ ```bash
34
+ https://github.com/HAKIM-ML/
35
+ Kidney-disease-classification-mlops
36
+ ```
37
+ ### STEP 01- Create a conda environment after opening the repository
38
+
39
+ ```bash
40
+ conda create -n cnncls python=3.8 -y
41
+ ```
42
+
43
+ ```bash
44
+ conda activate cnncls
45
+ ```
46
+
47
+
48
+ ### STEP 02- install the requirements
49
+ ```bash
50
+ pip install -r requirements.txt
51
+ ```
52
+
53
+ ```bash
54
+ # Finally run the following command
55
+ python app.py
56
+ ```
57
+
58
+ Now,
59
+ ```bash
60
+ open up you local host and port
61
+ ```
62
+
63
+
64
+
65
+
66
+
67
+
68
+ ## MLflow
69
+
70
+ - [Documentation](https://mlflow.org/docs/latest/index.html)
71
+
72
+
73
+ ##### cmd
74
+ - mlflow ui
75
+
76
+ ### dagshub
77
+ [dagshub](https://dagshub.com/)
78
+ MLFLOW_TRACKING_URI = https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow
79
+
80
+
81
+ python script.py
82
+
83
+ import dagshub
84
+ dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)
85
+
86
+ import mlflow
87
+ with mlflow.start_run():
88
+ mlflow.log_param('parameter name', 'value')
89
+ mlflow.log_metric('metric name', 1)
90
+
91
+ ### DVC cmd
92
+
93
+ 1. dvc init
94
+ 2. dvc repro
95
+ 3. dvc dag
96
+
97
+
98
+ ## About MLflow & DVC
99
+
100
+ MLflow
101
+
102
+ - Its Production Grade
103
+ - Trace all of your expriements
104
+ - Logging & taging your model
105
+
106
+
107
+ DVC
108
+
109
+ - Its very lite weight for POC only
110
+ - lite weight expriements tracker
111
+ - It can perform Orchestration (Creating Pipelines)
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import io
4
+ from PIL import Image
5
+ import os
6
+ from cnnClassifier.pipeline.predict import Prediction
7
+
8
+ st.set_page_config(page_title="Chicken Health Predictor", page_icon="🐔", layout="wide")
9
+
10
+ st.title("🐔 Chicken Health Predictor")
11
+ st.markdown("### Upload an image to predict if the chicken is healthy or has coccidiosis")
12
+
13
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
14
+
15
+ col1, col2 = st.columns(2)
16
+
17
+ if uploaded_file is not None:
18
+ image = Image.open(uploaded_file)
19
+ col1.image(image, caption="Uploaded Image", use_column_width=True)
20
+
21
+ # Save the uploaded file temporarily
22
+ temp_file = "temp_image.jpg"
23
+ image.save(temp_file)
24
+
25
+ with st.spinner("Analyzing the image..."):
26
+ predictor = Prediction(temp_file)
27
+ prediction = predictor.predict()
28
+
29
+ # Remove the temporary file
30
+ os.remove(temp_file)
31
+
32
+ col2.markdown("## Prediction Result")
33
+ if prediction == "Normal":
34
+ col2.success(f"The chicken appears to be **{prediction}**! 🎉")
35
+ col2.markdown("Keep up the good care for your feathered friend!")
36
+ else:
37
+ col2.error(f"The kidney may have **{prediction}**. 😢")
38
+ col2.markdown("Please consult with a veterinarian for proper treatment.")
39
+
40
+
41
+
42
+ st.sidebar.title("About")
43
+ st.sidebar.info(
44
+ "This app uses a deep learning model to predict whether a chicken is healthy "
45
+ "or has coccidiosis based on an uploaded image. Always consult with a "
46
+ "veterinarian for accurate diagnosis and treatment."
47
+ )
48
+
49
+ st.sidebar.title("Instructions")
50
+ st.sidebar.markdown(
51
+ """
52
+ 1. Upload a clear image of a chicken.
53
+ 2. Wait for the model to analyze the image.
54
+ 3. View the prediction result and additional information.
55
+ """
56
+ )
57
+
58
+ st.markdown(
59
+ """
60
+ <style>
61
+ .reportview-container {
62
+ background: linear-gradient(to right, #FDFCFB, #E2D1C3);
63
+ }
64
+ .sidebar .sidebar-content {
65
+ background: linear-gradient(to bottom, #FDFCFB, #E2D1C3);
66
+ }
67
+ </style>
68
+ """,
69
+ unsafe_allow_html=True,
70
+ )
config/config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ atifacts_root : artifacts
2
+
3
+ data_ingestion:
4
+ root_dir : artifacts/data_ingestion
5
+ source_URL : https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing
6
+ local_data_file : artifacts/data_ingestion/data.zip
7
+ unzip_dir : artifacts/data_ingestion/unzip
8
+
9
+
10
+ prepare_base_model:
11
+ root_dir: artifacts/prepare_base_model
12
+ base_model_path: artifacts/prepare_base_model/base_model.h5
13
+ updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
14
+
15
+
16
+
17
+ training:
18
+ root_dir: artifacts/training
19
+ trained_model_path : artifacts/training/model.h5
20
+
21
+
dvc.lock ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ schema: '2.0'
2
+ stages:
3
+ data_ingestion:
4
+ cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
5
+ deps:
6
+ - path: config/config.yaml
7
+ hash: md5
8
+ md5: 18c5d166940398f449d80f3bf7ceba78
9
+ size: 601
10
+ - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
11
+ hash: md5
12
+ md5: 9ab8c5d8d045a810fdc294c23dba44a2
13
+ size: 906
14
+ outs:
15
+ - path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
16
+ hash: md5
17
+ md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
18
+ size: 58936381
19
+ nfiles: 465
20
+ prepare_base_model:
21
+ cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
22
+ deps:
23
+ - path: config/config.yaml
24
+ hash: md5
25
+ md5: 18c5d166940398f449d80f3bf7ceba78
26
+ size: 601
27
+ - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
28
+ hash: md5
29
+ md5: e8e39a301f4b90d1b4f2c86acc66ef32
30
+ size: 999
31
+ params:
32
+ params.yaml:
33
+ CLASSES: 2
34
+ IMAGE_SIZE:
35
+ - 224
36
+ - 224
37
+ - 3
38
+ INCLUDE_TOP: false
39
+ LEARNING_RATE: 0.009
40
+ WEIGHTS: imagenet
41
+ outs:
42
+ - path: artifacts/prepare_base_model
43
+ hash: md5
44
+ md5: 186cffa6925a8727cbd781402a5b2d91.dir
45
+ size: 118054560
46
+ nfiles: 2
47
+ training:
48
+ cmd: python src/cnnClassifier/pipeline/stage_03_train_model.py
49
+ deps:
50
+ - path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
51
+ hash: md5
52
+ md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
53
+ size: 58936381
54
+ nfiles: 465
55
+ - path: artifacts/prepare_base_model
56
+ hash: md5
57
+ md5: 186cffa6925a8727cbd781402a5b2d91.dir
58
+ size: 118054560
59
+ nfiles: 2
60
+ - path: config/config.yaml
61
+ hash: md5
62
+ md5: 18c5d166940398f449d80f3bf7ceba78
63
+ size: 601
64
+ - path: src/cnnClassifier/pipeline/stage_03_train_model.py
65
+ hash: md5
66
+ md5: 3ef39a6e5a0d665c7c48877e098f3c82
67
+ size: 919
68
+ params:
69
+ params.yaml:
70
+ AUGMENTATION: true
71
+ BATCH_SIZE: 32
72
+ EPOCHS: 2
73
+ IMAGE_SIZE:
74
+ - 224
75
+ - 224
76
+ - 3
77
+ outs:
78
+ - path: artifacts/training/model.h5
79
+ hash: md5
80
+ md5: b860a0e1daa2296bf1ab06265a233dae
81
+ size: 59337520
82
+ evaluation:
83
+ cmd: python src/cnnClassifier/pipeline/stage_04_evaluation.py
84
+ deps:
85
+ - path: artifacts/data_ingestion/unzip/kidney-ct-scan-image
86
+ hash: md5
87
+ md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir
88
+ size: 58936381
89
+ nfiles: 465
90
+ - path: artifacts/training/model.h5
91
+ hash: md5
92
+ md5: b860a0e1daa2296bf1ab06265a233dae
93
+ size: 59337520
94
+ - path: config/config.yaml
95
+ hash: md5
96
+ md5: 18c5d166940398f449d80f3bf7ceba78
97
+ size: 601
98
+ - path: src/cnnClassifier/pipeline/stage_04_evaluation.py
99
+ hash: md5
100
+ md5: ef1e7f821e740d1e4a9d51a4bf724e68
101
+ size: 888
102
+ params:
103
+ params.yaml:
104
+ BATCH_SIZE: 32
105
+ IMAGE_SIZE:
106
+ - 224
107
+ - 224
108
+ - 3
109
+ outs:
110
+ - path: scores.json
111
+ hash: md5
112
+ md5: 8930e55b40b3d3c2866622648a461a1c
113
+ size: 72
dvc.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stages:
2
+ data_ingestion:
3
+ cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
4
+ deps:
5
+ - src/cnnClassifier/pipeline/stage_01_data_ingestion.py
6
+ - config/config.yaml
7
+ outs:
8
+ - artifacts/data_ingestion/unzip/kidney-ct-scan-image
9
+
10
+ prepare_base_model:
11
+ cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
12
+ deps:
13
+ - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
14
+ - config/config.yaml
15
+ params:
16
+ - IMAGE_SIZE
17
+ - INCLUDE_TOP
18
+ - CLASSES
19
+ - WEIGHTS
20
+ - LEARNING_RATE
21
+ outs:
22
+ - artifacts/prepare_base_model
23
+
24
+ training:
25
+ cmd: python src/cnnClassifier/pipeline/stage_03_train_model.py
26
+ deps:
27
+ - src/cnnClassifier/pipeline/stage_03_train_model.py
28
+ - config/config.yaml
29
+ - artifacts/data_ingestion/unzip/kidney-ct-scan-image
30
+ - artifacts/prepare_base_model
31
+ params:
32
+ - IMAGE_SIZE
33
+ - EPOCHS
34
+ - BATCH_SIZE
35
+ - AUGMENTATION
36
+ outs:
37
+ - artifacts/training/model.h5
38
+
39
+ evaluation:
40
+ cmd: python src/cnnClassifier/pipeline/stage_04_evaluation.py
41
+ deps:
42
+ - src/cnnClassifier/pipeline/stage_04_evaluation.py
43
+ - config/config.yaml
44
+ - artifacts/data_ingestion/unzip/kidney-ct-scan-image
45
+ - artifacts/training/model.h5
46
+ params:
47
+ - IMAGE_SIZE
48
+ - BATCH_SIZE
49
+ metrics:
50
+ - scores.json:
51
+ cache: false
main.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier import logger
2
+ from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
3
+ from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline
4
+ from cnnClassifier.pipeline.stage_03_train_model import ModelTrainingPipeline
5
+ from cnnClassifier.pipeline.stage_04_evaluation import EvaluationTrainingPipeline
6
+
7
+ STAGE_NAME = "Data Ingestion stage"
8
+ try:
9
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
10
+ data_ingestion = DataIngestionTrainingPipeline()
11
+ data_ingestion.main()
12
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
13
+ except Exception as e:
14
+ logger.exception(e)
15
+ raise e
16
+
17
+ STAGE_NAME = "Prepare Base Model stage"
18
+ try:
19
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
20
+ prepare_base_model = PrepareBaseModelTrainingPipeline()
21
+ prepare_base_model.main()
22
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
23
+ except Exception as e:
24
+ logger.exception(e)
25
+ raise e
26
+
27
+
28
+ STAGE_NAME = "Model Training"
29
+
30
+ try:
31
+ logger.info(f"*******************")
32
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
33
+ obj = ModelTrainingPipeline()
34
+ obj.main()
35
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
36
+ except Exception as e:
37
+ logger.exception(e)
38
+ raise e
39
+
40
+
41
+ STAGE_NAME = "Evaluation"
42
+
43
+ try:
44
+ logger.info(f"*******************")
45
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
46
+ obj = EvaluationTrainingPipeline()
47
+ obj.main()
48
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
49
+ except Exception as e:
50
+ logger.exception(e)
51
+ raise e
params.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AUGMENTATION : TRUE
2
+ IMAGE_SIZE: [224,224,3]
3
+ EPOCHS : 2
4
+ BATCH_SIZE : 32
5
+ LEARNING_RATE : 0.009
6
+ CLASSES : 2
7
+ WEIGHTS : imagenet
8
+ INCLUDE_TOP : False
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensorflow==2.12.0
2
+ pandas
3
+ dvc
4
+ mlflow==2.2.2
5
+ notebook
6
+ numpy
7
+ matplotlib
8
+ seaborn
9
+ python-box==6.0.2
10
+ pyYAML
11
+ tqdm
12
+ ensure==1.0.2
13
+ joblib
14
+ types-PyYAML
15
+ scipy
16
+ Flask
17
+ Flask-Cors
18
+ gdown
research/01_data_ingestion.ipynb ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "# entity\n",
20
+ "from dataclasses import dataclass\n",
21
+ "from pathlib import Path\n",
22
+ "\n",
23
+ "@dataclass(frozen=True)\n",
24
+ "class DataIngestionConfig:\n",
25
+ " root_dir: Path\n",
26
+ " source_URL: str\n",
27
+ " local_data_file: Path\n",
28
+ " unzip_dir: Path"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 11,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
38
+ "from cnnClassifier.constant import *\n",
39
+ "# Configuration\n",
40
+ "class ConfigurationManager:\n",
41
+ " def __init__(\n",
42
+ " self,\n",
43
+ " config_filepath = CONFIG_FILE_PATH,\n",
44
+ " params_filepath = PARAMS_FILE_PATH\n",
45
+ " ):\n",
46
+ " self.config = read_yaml(config_filepath)\n",
47
+ " self.params = read_yaml(params_filepath)\n",
48
+ " \n",
49
+ " create_directories([self.config.atifacts_root])\n",
50
+ " \n",
51
+ " \n",
52
+ " \n",
53
+ " def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
54
+ " config = self.config.data_ingestion\n",
55
+ " create_directories([config.root_dir])\n",
56
+ " \n",
57
+ " data_ingestion_config = DataIngestionConfig(\n",
58
+ " root_dir=config.root_dir,\n",
59
+ " source_URL=config.source_URL,\n",
60
+ " local_data_file=config.local_data_file,\n",
61
+ " unzip_dir=config.unzip_dir\n",
62
+ " )\n",
63
+ " \n",
64
+ " return data_ingestion_config\n",
65
+ " \n",
66
+ " \n",
67
+ " "
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 12,
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "# components\n",
77
+ "\n",
78
+ "import os\n",
79
+ "import zipfile\n",
80
+ "import gdown\n",
81
+ "from cnnClassifier import logger\n",
82
+ "from cnnClassifier.utils.common import get_size"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 13,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "class DataIngestion:\n",
92
+ " def __init__(self, config: DataIngestionConfig):\n",
93
+ " self.config = config\n",
94
+ " \n",
95
+ " def download_file(self) -> str:\n",
96
+ " try:\n",
97
+ " dataset_url = self.config.source_URL\n",
98
+ " zip_download_dir = self.config.local_data_file\n",
99
+ " os.makedirs('artifacts/datasets', exist_ok=True)\n",
100
+ " logger.info(f'Downloading data from {dataset_url} into {zip_download_dir}')\n",
101
+ " \n",
102
+ " file_id = dataset_url.split('/')[-2]\n",
103
+ " prefix = 'https://drive.google.com/uc?/export=download&id='\n",
104
+ " gdown.download(prefix + file_id, zip_download_dir)\n",
105
+ " logger.info(f'Downloaded data from {dataset_url} into {zip_download_dir}')\n",
106
+ " \n",
107
+ " except Exception as e:\n",
108
+ " raise e\n",
109
+ " \n",
110
+ " \n",
111
+ " \n",
112
+ " def extract_zip_file(self):\n",
113
+ " \"\"\"\n",
114
+ " zip_file_path: str\n",
115
+ " Extracts the zip file into the data directory\n",
116
+ " Function returns None\n",
117
+ " \"\"\"\n",
118
+ " unzip_path = self.config.unzip_dir\n",
119
+ " os.makedirs(unzip_path, exist_ok=True)\n",
120
+ " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n",
121
+ " zip_ref.extractall(unzip_path)"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": null,
127
+ "metadata": {},
128
+ "outputs": [
129
+ {
130
+ "name": "stderr",
131
+ "output_type": "stream",
132
+ "text": [
133
+ " 5%|▍ | 2.62M/57.7M [00:30<03:49, 240kB/s]"
134
+ ]
135
+ }
136
+ ],
137
+ "source": [
138
+ "try:\n",
139
+ " config = ConfigurationManager()\n",
140
+ " data_ingestion_config = config.get_data_ingestion_config()\n",
141
+ " data_ingestion = DataIngestion(config=data_ingestion_config)\n",
142
+ " data_ingestion.download_file()\n",
143
+ " data_ingestion.extrat_zip_file()\n",
144
+ "except Exception as e:\n",
145
+ " raise e"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": null,
151
+ "metadata": {},
152
+ "outputs": [],
153
+ "source": []
154
+ }
155
+ ],
156
+ "metadata": {
157
+ "kernelspec": {
158
+ "display_name": "Python 3",
159
+ "language": "python",
160
+ "name": "python3"
161
+ },
162
+ "language_info": {
163
+ "codemirror_mode": {
164
+ "name": "ipython",
165
+ "version": 3
166
+ },
167
+ "file_extension": ".py",
168
+ "mimetype": "text/x-python",
169
+ "name": "python",
170
+ "nbconvert_exporter": "python",
171
+ "pygments_lexer": "ipython3",
172
+ "version": "3.11.0"
173
+ }
174
+ },
175
+ "nbformat": 4,
176
+ "nbformat_minor": 2
177
+ }
research/02_prepare_base_model.ipynb ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "from dataclasses import dataclass\n",
20
+ "from pathlib import Path\n",
21
+ "\n",
22
+ "\n",
23
+ "@dataclass(frozen=True)\n",
24
+ "class PrepareBaseModelConfig:\n",
25
+ " root_dir: Path\n",
26
+ " base_model_path: Path\n",
27
+ " updated_base_model_path: Path\n",
28
+ " params_image_size: list\n",
29
+ " params_learning_rate: float\n",
30
+ " params_include_top: bool\n",
31
+ " params_weights: str\n",
32
+ " params_classes: int"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 3,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
42
+ "from cnnClassifier.constant import *\n",
43
+ "# Configuration\n",
44
+ "class ConfigurationManager:\n",
45
+ " def __init__(\n",
46
+ " self,\n",
47
+ " config_filepath = CONFIG_FILE_PATH,\n",
48
+ " params_filepath = PARAMS_FILE_PATH):\n",
49
+ "\n",
50
+ " self.config = read_yaml(config_filepath)\n",
51
+ " self.params = read_yaml(params_filepath)\n",
52
+ "\n",
53
+ " create_directories([self.config.atifacts_root])\n",
54
+ "\n",
55
+ " \n",
56
+ "\n",
57
+ " def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
58
+ " config = self.config.prepare_base_model\n",
59
+ " \n",
60
+ " create_directories([config.root_dir])\n",
61
+ "\n",
62
+ " prepare_base_model_config = PrepareBaseModelConfig(\n",
63
+ " root_dir=Path(config.root_dir),\n",
64
+ " base_model_path=Path(config.base_model_path),\n",
65
+ " updated_base_model_path=Path(config.updated_base_model_path),\n",
66
+ " params_image_size=self.params.IMAGE_SIZE,\n",
67
+ " params_learning_rate=self.params.LEARNING_RATE,\n",
68
+ " params_include_top=self.params.INCLUDE_TOP,\n",
69
+ " params_weights=self.params.WEIGHTS,\n",
70
+ " params_classes=self.params.CLASSES\n",
71
+ " )\n",
72
+ "\n",
73
+ " return prepare_base_model_config"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 4,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "[2024-07-30 03:19:34,344: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
86
+ "]\n"
87
+ ]
88
+ }
89
+ ],
90
+ "source": [
91
+ "import os\n",
92
+ "import urllib.request as request\n",
93
+ "import tensorflow as tf\n",
94
+ "\n",
95
+ "class PrepareBaseModel:\n",
96
+ " def __init__(self, config: PrepareBaseModelConfig):\n",
97
+ " self.config = config\n",
98
+ "\n",
99
+ " \n",
100
+ " def get_base_model(self):\n",
101
+ " self.model = tf.keras.applications.vgg16.VGG16(\n",
102
+ " input_shape=self.config.params_image_size,\n",
103
+ " weights=self.config.params_weights,\n",
104
+ " include_top=self.config.params_include_top\n",
105
+ " )\n",
106
+ "\n",
107
+ " self.save_model(path=self.config.base_model_path, model=self.model)\n",
108
+ "\n",
109
+ " \n",
110
+ "\n",
111
+ " @staticmethod\n",
112
+ " def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
113
+ " if freeze_all:\n",
114
+ " for layer in model.layers:\n",
115
+ " model.trainable = False\n",
116
+ " elif (freeze_till is not None) and (freeze_till > 0):\n",
117
+ " for layer in model.layers[:-freeze_till]:\n",
118
+ " model.trainable = False\n",
119
+ "\n",
120
+ " flatten_in = tf.keras.layers.Flatten()(model.output)\n",
121
+ " prediction = tf.keras.layers.Dense(\n",
122
+ " units=classes,\n",
123
+ " activation=\"softmax\"\n",
124
+ " )(flatten_in)\n",
125
+ "\n",
126
+ " full_model = tf.keras.models.Model(\n",
127
+ " inputs=model.input,\n",
128
+ " outputs=prediction\n",
129
+ " )\n",
130
+ "\n",
131
+ " full_model.compile(\n",
132
+ " optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
133
+ " loss=tf.keras.losses.CategoricalCrossentropy(),\n",
134
+ " metrics=[\"accuracy\"]\n",
135
+ " )\n",
136
+ "\n",
137
+ " full_model.summary()\n",
138
+ " return full_model\n",
139
+ " \n",
140
+ " \n",
141
+ " def update_base_model(self):\n",
142
+ " self.full_model = self._prepare_full_model(\n",
143
+ " model=self.model,\n",
144
+ " classes=self.config.params_classes,\n",
145
+ " freeze_all=True,\n",
146
+ " freeze_till=None,\n",
147
+ " learning_rate=self.config.params_learning_rate\n",
148
+ " )\n",
149
+ "\n",
150
+ " self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
151
+ "\n",
152
+ " \n",
153
+ " \n",
154
+ " @staticmethod\n",
155
+ " def save_model(path: Path, model: tf.keras.Model):\n",
156
+ " model.save(path)"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 5,
162
+ "metadata": {},
163
+ "outputs": [
164
+ {
165
+ "name": "stdout",
166
+ "output_type": "stream",
167
+ "text": [
168
+ "[2024-07-30 03:19:36,293: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
169
+ "[2024-07-30 03:19:36,296: INFO: common: yaml file: params.yaml loaded successfully]\n",
170
+ "[2024-07-30 03:19:36,298: INFO: common: Created directory at: artifacts]\n",
171
+ "[2024-07-30 03:19:36,299: INFO: common: Created directory at: artifacts/prepare_base_model]\n",
172
+ "[2024-07-30 03:19:36,531: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
173
+ "]\n",
174
+ "[2024-07-30 03:19:36,660: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
175
+ "]\n",
176
+ "[2024-07-30 03:19:37,174: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n",
177
+ "Model: \"model\"\n",
178
+ "_________________________________________________________________\n",
179
+ " Layer (type) Output Shape Param # \n",
180
+ "=================================================================\n",
181
+ " input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n",
182
+ " \n",
183
+ " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",
184
+ " \n",
185
+ " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",
186
+ " \n",
187
+ " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",
188
+ " \n",
189
+ " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",
190
+ " \n",
191
+ " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",
192
+ " \n",
193
+ " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",
194
+ " \n",
195
+ " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",
196
+ " \n",
197
+ " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",
198
+ " \n",
199
+ " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",
200
+ " \n",
201
+ " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",
202
+ " \n",
203
+ " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",
204
+ " \n",
205
+ " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",
206
+ " \n",
207
+ " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",
208
+ " \n",
209
+ " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n"
210
+ ]
211
+ },
212
+ {
213
+ "name": "stderr",
214
+ "output_type": "stream",
215
+ "text": [
216
+ "c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\engine\\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
217
+ " saving_api.save_model(\n"
218
+ ]
219
+ },
220
+ {
221
+ "name": "stdout",
222
+ "output_type": "stream",
223
+ "text": [
224
+ " \n",
225
+ " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",
226
+ " \n",
227
+ " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",
228
+ " \n",
229
+ " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",
230
+ " \n",
231
+ " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n",
232
+ " \n",
233
+ " flatten (Flatten) (None, 25088) 0 \n",
234
+ " \n",
235
+ " dense (Dense) (None, 2) 50178 \n",
236
+ " \n",
237
+ "=================================================================\n",
238
+ "Total params: 14764866 (56.32 MB)\n",
239
+ "Trainable params: 50178 (196.01 KB)\n",
240
+ "Non-trainable params: 14714688 (56.13 MB)\n",
241
+ "_________________________________________________________________\n"
242
+ ]
243
+ }
244
+ ],
245
+ "source": [
246
+ "try:\n",
247
+ " config = ConfigurationManager()\n",
248
+ " prepare_base_model_config = config.get_prepare_base_model_config()\n",
249
+ " prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
250
+ " prepare_base_model.get_base_model()\n",
251
+ " prepare_base_model.update_base_model()\n",
252
+ "except Exception as e:\n",
253
+ " raise e"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "metadata": {},
260
+ "outputs": [],
261
+ "source": []
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": null,
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": []
269
+ }
270
+ ],
271
+ "metadata": {
272
+ "kernelspec": {
273
+ "display_name": "Python 3",
274
+ "language": "python",
275
+ "name": "python3"
276
+ },
277
+ "language_info": {
278
+ "codemirror_mode": {
279
+ "name": "ipython",
280
+ "version": 3
281
+ },
282
+ "file_extension": ".py",
283
+ "mimetype": "text/x-python",
284
+ "name": "python",
285
+ "nbconvert_exporter": "python",
286
+ "pygments_lexer": "ipython3",
287
+ "version": "3.11.0"
288
+ }
289
+ },
290
+ "nbformat": 4,
291
+ "nbformat_minor": 2
292
+ }
research/03_model_training.ipynb ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')\n"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "'d:\\\\MLOps-Project\\\\Kidney-disease-classification-mlops'"
22
+ ]
23
+ },
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "%pwd"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 4,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "from dataclasses import dataclass\n",
40
+ "from pathlib import Path\n",
41
+ "\n",
42
+ "@dataclass(frozen=True)\n",
43
+ "class TrainingConfig:\n",
44
+ " root_dir : Path\n",
45
+ " training_model_path : Path\n",
46
+ " updata_base_model_path : Path\n",
47
+ " training_data: Path\n",
48
+ " params_epochs : int\n",
49
+ " params_is_augmentation : bool\n",
50
+ " params_batch_size : int\n",
51
+ " params_image_size : list\n",
52
+ " "
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 9,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
62
+ "from cnnClassifier.constant import *\n",
63
+ "\n",
64
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
65
+ "from cnnClassifier.constant import *\n",
66
+ "# Configuration\n",
67
+ "class ConfigurationManager:\n",
68
+ " def __init__(\n",
69
+ " self,\n",
70
+ " config_filepath = CONFIG_FILE_PATH,\n",
71
+ " params_filepath = PARAMS_FILE_PATH):\n",
72
+ "\n",
73
+ " self.config = read_yaml(config_filepath)\n",
74
+ " self.params = read_yaml(params_filepath)\n",
75
+ "\n",
76
+ " create_directories([self.config.atifacts_root])\n",
77
+ " \n",
78
+ " def get_training_config(self) -> TrainingConfig:\n",
79
+ " training = self.config.training\n",
80
+ " prepare_base_model =self.config.prepare_base_model\n",
81
+ " params = self.params\n",
82
+ " training_data = os.path.join(self.config.data_ingestion.unzip_dir, 'kidney-ct-scan-image') \n",
83
+ " \n",
84
+ " create_directories([\n",
85
+ " Path(training.root_dir)\n",
86
+ " ])\n",
87
+ " \n",
88
+ " training_config = TrainingConfig(\n",
89
+ " root_dir= Path(training.root_dir),\n",
90
+ " training_model_path=Path(training.trained_model_path),\n",
91
+ " updata_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
92
+ " training_data = Path(training_data),\n",
93
+ " params_epochs = params.EPOCHS, \n",
94
+ " params_batch_size= params.BATCH_SIZE,\n",
95
+ " params_is_augmentation= params.AUGMENTATION,\n",
96
+ " params_image_size= params.IMAGE_SIZE\n",
97
+ " )\n",
98
+ " \n",
99
+ " return training_config\n",
100
+ " "
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 10,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "import tensorflow as tf"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": 19,
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "class Training:\n",
119
+ " def __init__(self, confg : TrainingConfig):\n",
120
+ " self.config = confg\n",
121
+ " \n",
122
+ " def get_base_model(self):\n",
123
+ " self.model = tf.keras.models.load_model(\n",
124
+ " self.config.updata_base_model_path\n",
125
+ " )\n",
126
+ " \n",
127
+ " \n",
128
+ " def train_vaid_generator(self):\n",
129
+ " datagenerator_kwargs = dict(\n",
130
+ " rescale = 1 / 255,\n",
131
+ " validation_split = 0.20\n",
132
+ " )\n",
133
+ " \n",
134
+ " dataflow_kwargs = dict(\n",
135
+ " target_size = self.config.params_image_size[:-1],\n",
136
+ " batch_size = self.config.params_batch_size,\n",
137
+ " interpolation = 'bilinear'\n",
138
+ " )\n",
139
+ " valid_datagernerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
140
+ " **datagenerator_kwargs\n",
141
+ " )\n",
142
+ " \n",
143
+ " self.valid_generator = valid_datagernerator.flow_from_directory(\n",
144
+ " directory = self.config.training_data,\n",
145
+ " subset = 'validation',\n",
146
+ " shuffle = True,\n",
147
+ " **dataflow_kwargs\n",
148
+ " )\n",
149
+ " \n",
150
+ " if self.config.params_is_augmentation:\n",
151
+ " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
152
+ " \n",
153
+ " \n",
154
+ " rotation_range = 40,\n",
155
+ " horizontal_flip = True,\n",
156
+ " width_shift_range = 0.2,\n",
157
+ " height_shift_range = 0.2,\n",
158
+ " shear_range = 0.2,\n",
159
+ " zoom_range = 0.2,\n",
160
+ " **datagenerator_kwargs\n",
161
+ " )\n",
162
+ " \n",
163
+ " \n",
164
+ " else:\n",
165
+ " train_datagenerator = valid_datagernerator\n",
166
+ " self.train_generator = train_datagenerator.flow_from_directory(\n",
167
+ " directory = self.config.training_data,\n",
168
+ " subset = 'training',\n",
169
+ " shuffle = True,\n",
170
+ " **dataflow_kwargs\n",
171
+ " )\n",
172
+ " \n",
173
+ " @staticmethod\n",
174
+ " def save_model(path: Path, model: tf.keras.Model):\n",
175
+ " model.save(path)\n",
176
+ " \n",
177
+ " def train(self):\n",
178
+ " self.steps_per_epchs = self.train_generator.samples // self.train_generator.batch_size\n",
179
+ " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
180
+ " \n",
181
+ " self.model.fit(\n",
182
+ " self.train_generator,\n",
183
+ " epochs = self.config.params_epochs,\n",
184
+ " steps_per_epoch = self.steps_per_epchs,\n",
185
+ " validation_steps = self.validation_steps,\n",
186
+ " validation_data = self.valid_generator\n",
187
+ " )\n",
188
+ " \n",
189
+ " self.save_model(\n",
190
+ " path = self.config.training_data,\n",
191
+ " model = self.model\n",
192
+ " )\n",
193
+ "\n",
194
+ " "
195
+ ]
196
+ },
197
+ {
198
+ "cell_type": "code",
199
+ "execution_count": 20,
200
+ "metadata": {},
201
+ "outputs": [
202
+ {
203
+ "name": "stdout",
204
+ "output_type": "stream",
205
+ "text": [
206
+ "[2024-07-31 20:16:53,704: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
207
+ "[2024-07-31 20:16:53,707: INFO: common: yaml file: params.yaml loaded successfully]\n",
208
+ "[2024-07-31 20:16:53,709: INFO: common: Created directory at: artifacts]\n",
209
+ "[2024-07-31 20:16:53,711: INFO: common: Created directory at: artifacts\\training]\n",
210
+ "Found 93 images belonging to 2 classes.\n",
211
+ "Found 372 images belonging to 2 classes.\n",
212
+ "Epoch 1/10\n",
213
+ "[2024-07-31 20:16:55,760: WARNING: module_wrapper: From c:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\keras\\src\\utils\\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.\n",
214
+ "]\n",
215
+ "23/23 [==============================] - 32s 1s/step - loss: 0.6976 - accuracy: 0.5983 - val_loss: 0.5528 - val_accuracy: 0.6750\n",
216
+ "Epoch 2/10\n",
217
+ "23/23 [==============================] - 18s 776ms/step - loss: 0.5961 - accuracy: 0.7022 - val_loss: 0.5576 - val_accuracy: 0.8250\n",
218
+ "Epoch 3/10\n",
219
+ "23/23 [==============================] - 18s 780ms/step - loss: 0.5489 - accuracy: 0.7612 - val_loss: 0.6042 - val_accuracy: 0.5250\n",
220
+ "Epoch 4/10\n",
221
+ "23/23 [==============================] - 18s 779ms/step - loss: 0.5166 - accuracy: 0.8006 - val_loss: 0.5593 - val_accuracy: 0.5750\n",
222
+ "Epoch 5/10\n",
223
+ "23/23 [==============================] - 18s 774ms/step - loss: 0.4863 - accuracy: 0.7949 - val_loss: 0.6155 - val_accuracy: 0.5250\n",
224
+ "Epoch 6/10\n",
225
+ "23/23 [==============================] - 18s 789ms/step - loss: 0.4486 - accuracy: 0.8062 - val_loss: 0.5774 - val_accuracy: 0.5250\n",
226
+ "Epoch 7/10\n",
227
+ "23/23 [==============================] - 18s 772ms/step - loss: 0.4574 - accuracy: 0.8034 - val_loss: 0.5751 - val_accuracy: 0.5125\n",
228
+ "Epoch 8/10\n",
229
+ "23/23 [==============================] - 18s 772ms/step - loss: 0.4493 - accuracy: 0.7949 - val_loss: 0.5814 - val_accuracy: 0.5125\n",
230
+ "Epoch 9/10\n",
231
+ "23/23 [==============================] - 18s 772ms/step - loss: 0.4414 - accuracy: 0.7921 - val_loss: 0.5636 - val_accuracy: 0.5125\n",
232
+ "Epoch 10/10\n",
233
+ "23/23 [==============================] - 18s 794ms/step - loss: 0.4290 - accuracy: 0.8090 - val_loss: 0.5743 - val_accuracy: 0.5000\n",
234
+ "[2024-07-31 20:20:09,590: INFO: builder_impl: Assets written to: artifacts\\data_ingestion\\unzip\\kidney-ct-scan-image\\assets]\n"
235
+ ]
236
+ }
237
+ ],
238
+ "source": [
239
+ "try:\n",
240
+ " config = ConfigurationManager()\n",
241
+ " training_config = config.get_training_config()\n",
242
+ " training = Training(confg=training_config)\n",
243
+ " training.get_base_model()\n",
244
+ " training.train_vaid_generator()\n",
245
+ " training.train()\n",
246
+ " \n",
247
+ "except Exception as e:\n",
248
+ " raise e"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "markdown",
253
+ "metadata": {},
254
+ "source": []
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "metadata": {},
260
+ "outputs": [],
261
+ "source": []
262
+ }
263
+ ],
264
+ "metadata": {
265
+ "kernelspec": {
266
+ "display_name": "Python 3",
267
+ "language": "python",
268
+ "name": "python3"
269
+ },
270
+ "language_info": {
271
+ "codemirror_mode": {
272
+ "name": "ipython",
273
+ "version": 3
274
+ },
275
+ "file_extension": ".py",
276
+ "mimetype": "text/x-python",
277
+ "name": "python",
278
+ "nbconvert_exporter": "python",
279
+ "pygments_lexer": "ipython3",
280
+ "version": "3.11.0"
281
+ }
282
+ },
283
+ "nbformat": 4,
284
+ "nbformat_minor": 2
285
+ }
research/model_evaluation.ipynb ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "os.chdir('../')"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "'d:\\\\MLOps-Project\\\\Kidney-disease-classification-mlops'"
22
+ ]
23
+ },
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "%pwd"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 3,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "import tensorflow as tf"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 4,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "model = tf.keras.models.load_model('artifacts/training/model.h5')"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 5,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "from dataclasses import dataclass\n",
58
+ "from pathlib import Path\n",
59
+ "\n",
60
+ "@dataclass(frozen=True)\n",
61
+ "class EvaluationConfig:\n",
62
+ " path_of_model: Path\n",
63
+ " training_data: Path\n",
64
+ " all_params: dict\n",
65
+ " mlflow_uri: str\n",
66
+ " params_image_size: list\n",
67
+ " params_batch_size: int\n",
68
+ " "
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 12,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "from cnnClassifier.utils.common import read_yaml, create_directories,save_json\n",
78
+ "from cnnClassifier.constant import *\n",
79
+ "\n",
80
+ "from cnnClassifier.utils.common import read_yaml, create_directories\n",
81
+ "from cnnClassifier.constant import *\n",
82
+ "# Configuration\n",
83
+ "class ConfigurationManager:\n",
84
+ " def __init__(\n",
85
+ " self,\n",
86
+ " config_filepath = CONFIG_FILE_PATH,\n",
87
+ " params_filepath = PARAMS_FILE_PATH):\n",
88
+ "\n",
89
+ " self.config = read_yaml(config_filepath)\n",
90
+ " self.params = read_yaml(params_filepath)\n",
91
+ "\n",
92
+ " create_directories([self.config.atifacts_root])\n",
93
+ " \n",
94
+ " def get_evaluation_config(self) -> EvaluationConfig:\n",
95
+ " eval_config = EvaluationConfig(\n",
96
+ " path_of_model='artifacts/training/model.h5',\n",
97
+ " training_data='artifacts/data_ingestion/unzip/kidney-ct-scan-image',\n",
98
+ " mlflow_uri='https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow',\n",
99
+ " all_params= self.params,\n",
100
+ " params_image_size=self.params.IMAGE_SIZE,\n",
101
+ " params_batch_size=self.params.BATCH_SIZE\n",
102
+ " )\n",
103
+ " return eval_config\n",
104
+ " \n",
105
+ " "
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 13,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "import tensorflow as tf\n",
115
+ "from pathlib import Path\n",
116
+ "import mlflow\n",
117
+ "import mlflow.keras\n",
118
+ "from urllib.parse import urlparse"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 17,
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "class Evaluation:\n",
128
+ " def __init__(self, config: EvaluationConfig):\n",
129
+ " self.config = config\n",
130
+ " \n",
131
+ " def _valid_generator(self):\n",
132
+ "\n",
133
+ " datagenerator_kwargs = dict(\n",
134
+ " rescale = 1./255,\n",
135
+ " validation_split=0.30\n",
136
+ " )\n",
137
+ "\n",
138
+ " dataflow_kwargs = dict(\n",
139
+ " target_size=self.config.params_image_size[:-1],\n",
140
+ " batch_size=self.config.params_batch_size,\n",
141
+ " interpolation=\"bilinear\"\n",
142
+ " )\n",
143
+ "\n",
144
+ " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
145
+ " **datagenerator_kwargs\n",
146
+ " )\n",
147
+ "\n",
148
+ " self.valid_generator = valid_datagenerator.flow_from_directory(\n",
149
+ " directory=self.config.training_data,\n",
150
+ " subset=\"validation\",\n",
151
+ " shuffle=False,\n",
152
+ " **dataflow_kwargs\n",
153
+ " )\n",
154
+ " \n",
155
+ " @staticmethod\n",
156
+ " def load_model(path: Path) -> tf.keras.Model:\n",
157
+ " return tf.keras.models.load_model(path)\n",
158
+ " \n",
159
+ " \n",
160
+ " def evaluation(self):\n",
161
+ " self.model = self.load_model(self.config.path_of_model)\n",
162
+ " self._valid_generator()\n",
163
+ " self.score = model.evaluate(self.valid_generator)\n",
164
+ " self.save_score()\n",
165
+ " \n",
166
+ " def save_score(self):\n",
167
+ " scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
168
+ " save_json(path=Path(\"scores.json\"), data=scores)\n",
169
+ " \n",
170
+ " def log_into_mlflow(self):\n",
171
+ " mlflow.set_registry_uri(self.config.mlflow_uri)\n",
172
+ " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
173
+ " \n",
174
+ " with mlflow.start_run():\n",
175
+ " mlflow.log_params(self.config.all_params)\n",
176
+ " mlflow.log_metrics(\n",
177
+ " {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
178
+ " )\n",
179
+ " # Model registry does not work with file store\n",
180
+ " if tracking_url_type_store != \"file\":\n",
181
+ "\n",
182
+ " # Register the model\n",
183
+ " # There are other ways to use the Model Registry, which depends on the use case,\n",
184
+ " # please refer to the doc for more information:\n",
185
+ " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
186
+ " mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
187
+ " else:\n",
188
+ " mlflow.keras.log_model(self.model, \"model\")\n",
189
+ " "
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 15,
195
+ "metadata": {},
196
+ "outputs": [
197
+ {
198
+ "name": "stdout",
199
+ "output_type": "stream",
200
+ "text": [
201
+ "[2024-08-01 23:40:02,445: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/azizulhakim8291/Kidney-disease-classification-mlops \"HTTP/1.1 200 OK\"]\n"
202
+ ]
203
+ },
204
+ {
205
+ "data": {
206
+ "text/html": [
207
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Initialized MLflow to track repo <span style=\"color: #008000; text-decoration-color: #008000\">\"azizulhakim8291/Kidney-disease-classification-mlops\"</span>\n",
208
+ "</pre>\n"
209
+ ],
210
+ "text/plain": [
211
+ "Initialized MLflow to track repo \u001b[32m\"azizulhakim8291/Kidney-disease-classification-mlops\"\u001b[0m\n"
212
+ ]
213
+ },
214
+ "metadata": {},
215
+ "output_type": "display_data"
216
+ },
217
+ {
218
+ "name": "stdout",
219
+ "output_type": "stream",
220
+ "text": [
221
+ "[2024-08-01 23:40:02,453: INFO: helpers: Initialized MLflow to track repo \"azizulhakim8291/Kidney-disease-classification-mlops\"]\n"
222
+ ]
223
+ },
224
+ {
225
+ "data": {
226
+ "text/html": [
227
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!\n",
228
+ "</pre>\n"
229
+ ],
230
+ "text/plain": [
231
+ "Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!\n"
232
+ ]
233
+ },
234
+ "metadata": {},
235
+ "output_type": "display_data"
236
+ },
237
+ {
238
+ "name": "stdout",
239
+ "output_type": "stream",
240
+ "text": [
241
+ "[2024-08-01 23:40:02,458: INFO: helpers: Repository azizulhakim8291/Kidney-disease-classification-mlops initialized!]\n"
242
+ ]
243
+ }
244
+ ],
245
+ "source": [
246
+ "import dagshub\n",
247
+ "dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)\n",
248
+ "\n",
249
+ "import mlflow\n",
250
+ "with mlflow.start_run():\n",
251
+ " mlflow.log_param('parameter name', 'value')\n",
252
+ " mlflow.log_metric('metric name', 1)"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 19,
258
+ "metadata": {},
259
+ "outputs": [
260
+ {
261
+ "name": "stdout",
262
+ "output_type": "stream",
263
+ "text": [
264
+ "[2024-08-01 23:44:03,754: INFO: common: yaml file: config\\config.yaml loaded successfully]\n",
265
+ "[2024-08-01 23:44:03,764: INFO: common: yaml file: params.yaml loaded successfully]\n",
266
+ "[2024-08-01 23:44:03,770: INFO: common: Created directory at: artifacts]\n",
267
+ "Found 139 images belonging to 2 classes.\n",
268
+ "9/9 [==============================] - 13s 1s/step - loss: 0.5003 - accuracy: 0.9568\n",
269
+ "[2024-08-01 23:44:17,498: INFO: common: Json file saved at: scores.json]\n"
270
+ ]
271
+ },
272
+ {
273
+ "name": "stderr",
274
+ "output_type": "stream",
275
+ "text": [
276
+ "2024/08/01 23:44:20 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n"
277
+ ]
278
+ },
279
+ {
280
+ "name": "stdout",
281
+ "output_type": "stream",
282
+ "text": [
283
+ "[2024-08-01 23:44:22,432: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n",
284
+ "INFO:tensorflow:Assets written to: C:\\Users\\User\\AppData\\Local\\Temp\\tmp8n0wc3k0\\model\\data\\model\\assets\n",
285
+ "[2024-08-01 23:44:24,256: INFO: builder_impl: Assets written to: C:\\Users\\User\\AppData\\Local\\Temp\\tmp8n0wc3k0\\model\\data\\model\\assets]\n"
286
+ ]
287
+ },
288
+ {
289
+ "name": "stderr",
290
+ "output_type": "stream",
291
+ "text": [
292
+ "Registered model 'VGG16Model' already exists. Creating a new version of this model...\n",
293
+ "2024/08/01 23:47:06 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2\n",
294
+ "Created version '2' of model 'VGG16Model'.\n"
295
+ ]
296
+ }
297
+ ],
298
+ "source": [
299
+ "try:\n",
300
+ " config = ConfigurationManager()\n",
301
+ " eval_config = config.get_evaluation_config()\n",
302
+ " evaluation = Evaluation(eval_config)\n",
303
+ " evaluation.evaluation()\n",
304
+ " evaluation.log_into_mlflow()\n",
305
+ "\n",
306
+ "except Exception as e:\n",
307
+ " raise e"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": null,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": []
316
+ }
317
+ ],
318
+ "metadata": {
319
+ "kernelspec": {
320
+ "display_name": "Python 3",
321
+ "language": "python",
322
+ "name": "python3"
323
+ },
324
+ "language_info": {
325
+ "codemirror_mode": {
326
+ "name": "ipython",
327
+ "version": 3
328
+ },
329
+ "file_extension": ".py",
330
+ "mimetype": "text/x-python",
331
+ "name": "python",
332
+ "nbconvert_exporter": "python",
333
+ "pygments_lexer": "ipython3",
334
+ "version": "3.11.0"
335
+ }
336
+ },
337
+ "nbformat": 4,
338
+ "nbformat_minor": 2
339
+ }
research/trials.ipynb ADDED
File without changes
scores.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "loss": 14.36583423614502,
3
+ "accuracy": 0.5179855823516846
4
+ }
setup.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import setuptools
2
+ with open('README.md', 'r') as f:
3
+ long_description = f.read()
4
+
5
+ __version__ = '0.0.0'
6
+
7
+ REPO_NAME = "Kidney-disease-classification-mlops"
8
+ AUTHOR_USER_NAME = "HAKIM-ML"
9
+ SRC_REPO = "cnnClassifier"
10
+ AUTHOR_EMAIL = "[email protected]"
11
+
12
+
13
+ setuptools.setup(
14
+ name=SRC_REPO,
15
+ version=__version__,
16
+ author=AUTHOR_USER_NAME,
17
+ author_email=AUTHOR_EMAIL,
18
+ description="A small python package for CNN app",
19
+ long_description=long_description,
20
+ long_description_content="text/markdown",
21
+ url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
22
+ project_urls={
23
+ "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
24
+ },
25
+ package_dir={"": "src"},
26
+ packages=setuptools.find_packages(where="src")
27
+ )
src/cnnClassifier/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+
5
+ logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
6
+
7
+ log_dir = "logs"
8
+ log_filepath = os.path.join(log_dir,"running_logs.log")
9
+ os.makedirs(log_dir, exist_ok=True)
10
+
11
+
12
+ logging.basicConfig(
13
+ level= logging.INFO,
14
+ format= logging_str,
15
+
16
+ handlers=[
17
+ logging.FileHandler(log_filepath),
18
+ logging.StreamHandler(sys.stdout)
19
+ ]
20
+ )
21
+
22
+ logger = logging.getLogger("cnnClassifierLogger")
src/cnnClassifier/components/__init__.py ADDED
File without changes
src/cnnClassifier/components/data_ingestion.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # components
2
+
3
+ import os
4
+ import zipfile
5
+ import gdown
6
+ from cnnClassifier import logger
7
+ from cnnClassifier.utils.common import get_size
8
+ from cnnClassifier.entity.config_entity import DataIngestionConfig
9
+
10
+ class DataIngestion:
11
+ def __init__(self, config: DataIngestionConfig):
12
+ self.config = config
13
+
14
+ def download_file(self) -> str:
15
+ try:
16
+ dataset_url = self.config.source_URL
17
+ zip_download_dir = self.config.local_data_file
18
+ os.makedirs('artifacts/datasets', exist_ok=True)
19
+ logger.info(f'Downloading data from {dataset_url} into {zip_download_dir}')
20
+
21
+ file_id = dataset_url.split('/')[-2]
22
+ prefix = 'https://drive.google.com/uc?/export=download&id='
23
+ gdown.download(prefix + file_id, zip_download_dir)
24
+ logger.info(f'Downloaded data from {dataset_url} into {zip_download_dir}')
25
+
26
+ except Exception as e:
27
+ raise e
28
+
29
+
30
+
31
+ def extract_zip_file(self):
32
+ """
33
+ zip_file_path: str
34
+ Extracts the zip file into the data directory
35
+ Function returns None
36
+ """
37
+ unzip_path = self.config.unzip_dir
38
+ os.makedirs(unzip_path, exist_ok=True)
39
+ with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
40
+ zip_ref.extractall(unzip_path)
src/cnnClassifier/components/evaluation.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from pathlib import Path
3
+ import mlflow
4
+ import mlflow.keras
5
+ from urllib.parse import urlparse
6
+ from cnnClassifier.utils.common import save_json
7
+ from cnnClassifier.entity.config_entity import EvaluationConfig
8
+
9
+
10
+ model = tf.keras.models.load_model('artifacts/training/model.h5')
11
+
12
+ import dagshub
13
+ dagshub.init(repo_owner='azizulhakim8291', repo_name='Kidney-disease-classification-mlops', mlflow=True)
14
+
15
+ import mlflow
16
+ with mlflow.start_run():
17
+ mlflow.log_param('parameter name', 'value')
18
+ mlflow.log_metric('metric name', 1)
19
+
20
+
21
+ class Evaluation:
22
+ def __init__(self, config: EvaluationConfig):
23
+ self.config = config
24
+
25
+ def _valid_generator(self):
26
+
27
+ datagenerator_kwargs = dict(
28
+ rescale = 1./255,
29
+ validation_split=0.30
30
+ )
31
+
32
+ dataflow_kwargs = dict(
33
+ target_size=self.config.params_image_size[:-1],
34
+ batch_size=self.config.params_batch_size,
35
+ interpolation="bilinear"
36
+ )
37
+
38
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
39
+ **datagenerator_kwargs
40
+ )
41
+
42
+ self.valid_generator = valid_datagenerator.flow_from_directory(
43
+ directory=self.config.training_data,
44
+ subset="validation",
45
+ shuffle=False,
46
+ **dataflow_kwargs
47
+ )
48
+
49
+ @staticmethod
50
+ def load_model(path: Path) -> tf.keras.Model:
51
+ return tf.keras.models.load_model(path)
52
+
53
+
54
+ def evaluation(self):
55
+ self.model = self.load_model(self.config.path_of_model)
56
+ self._valid_generator()
57
+ self.score = model.evaluate(self.valid_generator)
58
+ self.save_score()
59
+
60
+ def save_score(self):
61
+ scores = {"loss": self.score[0], "accuracy": self.score[1]}
62
+ save_json(path=Path("scores.json"), data=scores)
63
+
64
+ def log_into_mlflow(self):
65
+ mlflow.set_registry_uri(self.config.mlflow_uri)
66
+ tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
67
+
68
+ with mlflow.start_run():
69
+ mlflow.log_params(self.config.all_params)
70
+ mlflow.log_metrics(
71
+ {"loss": self.score[0], "accuracy": self.score[1]}
72
+ )
73
+ # Model registry does not work with file store
74
+ if tracking_url_type_store != "file":
75
+
76
+ # Register the model
77
+ # There are other ways to use the Model Registry, which depends on the use case,
78
+ # please refer to the doc for more information:
79
+ # https://mlflow.org/docs/latest/model-registry.html#api-workflow
80
+ mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
81
+ else:
82
+ mlflow.keras.log_model(self.model, "model")
83
+
src/cnnClassifier/components/prepare_base_model.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import tensorflow as tf
4
+ from cnnClassifier.config.configuration import PrepareBaseModelConfig
5
+
6
+ class PrepareBaseModel:
7
+ def __init__(self, config: PrepareBaseModelConfig):
8
+ self.config = config
9
+
10
+
11
+ def get_base_model(self):
12
+ self.model = tf.keras.applications.vgg16.VGG16(
13
+ input_shape=self.config.params_image_size,
14
+ weights=self.config.params_weights,
15
+ include_top=self.config.params_include_top
16
+ )
17
+
18
+ self.save_model(path=self.config.base_model_path, model=self.model)
19
+
20
+
21
+
22
+ @staticmethod
23
+ def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
24
+ if freeze_all:
25
+ for layer in model.layers:
26
+ model.trainable = False
27
+ elif (freeze_till is not None) and (freeze_till > 0):
28
+ for layer in model.layers[:-freeze_till]:
29
+ model.trainable = False
30
+
31
+ flatten_in = tf.keras.layers.Flatten()(model.output)
32
+ prediction = tf.keras.layers.Dense(
33
+ units=classes,
34
+ activation="softmax"
35
+ )(flatten_in)
36
+
37
+ full_model = tf.keras.models.Model(
38
+ inputs=model.input,
39
+ outputs=prediction
40
+ )
41
+
42
+ full_model.compile(
43
+ optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
44
+ loss=tf.keras.losses.CategoricalCrossentropy(),
45
+ metrics=["accuracy"]
46
+ )
47
+
48
+ full_model.summary()
49
+ return full_model
50
+
51
+
52
+ def update_base_model(self):
53
+ self.full_model = self._prepare_full_model(
54
+ model=self.model,
55
+ classes=self.config.params_classes,
56
+ freeze_all=True,
57
+ freeze_till=None,
58
+ learning_rate=self.config.params_learning_rate
59
+ )
60
+
61
+ self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
62
+
63
+
64
+
65
+ @staticmethod
66
+ def save_model(path: Path, model: tf.keras.Model):
67
+ model.save(path)
src/cnnClassifier/components/training.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.entity.config_entity import TrainingConfig
2
+ import tensorflow as tf
3
+ from pathlib import Path
4
+
5
+ class Training:
6
+ def __init__(self, config: TrainingConfig):
7
+ self.config = config
8
+
9
+
10
+ def get_base_model(self):
11
+ self.model = tf.keras.models.load_model(
12
+ self.config.updated_base_model_path
13
+ )
14
+
15
+ def train_valid_generator(self):
16
+
17
+ datagenerator_kwargs = dict(
18
+ rescale = 1./255,
19
+ validation_split=0.20
20
+ )
21
+
22
+ dataflow_kwargs = dict(
23
+ target_size=self.config.params_image_size[:-1],
24
+ batch_size=self.config.params_batch_size,
25
+ interpolation="bilinear"
26
+ )
27
+
28
+ valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
29
+ **datagenerator_kwargs
30
+ )
31
+
32
+ self.valid_generator = valid_datagenerator.flow_from_directory(
33
+ directory=self.config.training_data,
34
+ subset="validation",
35
+ shuffle=False,
36
+ **dataflow_kwargs
37
+ )
38
+
39
+ if self.config.params_is_augmentation:
40
+ train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
41
+ rotation_range=40,
42
+ horizontal_flip=True,
43
+ width_shift_range=0.2,
44
+ height_shift_range=0.2,
45
+ shear_range=0.2,
46
+ zoom_range=0.2,
47
+ **datagenerator_kwargs
48
+ )
49
+ else:
50
+ train_datagenerator = valid_datagenerator
51
+
52
+ self.train_generator = train_datagenerator.flow_from_directory(
53
+ directory=self.config.training_data,
54
+ subset="training",
55
+ shuffle=True,
56
+ **dataflow_kwargs
57
+ )
58
+
59
+
60
+ @staticmethod
61
+ def save_model(path: Path, model: tf.keras.Model):
62
+ model.save(path)
63
+
64
+
65
+
66
+
67
+ def train(self):
68
+ self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
69
+ self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
70
+
71
+ self.model.fit(
72
+ self.train_generator,
73
+ epochs=self.config.params_epochs,
74
+ steps_per_epoch=self.steps_per_epoch,
75
+ validation_steps=self.validation_steps,
76
+ validation_data=self.valid_generator
77
+ )
78
+
79
+ self.save_model(
80
+ path=self.config.trained_model_path,
81
+ model=self.model
82
+ )
83
+
84
+
85
+
86
+
src/cnnClassifier/config/__init__.py ADDED
File without changes
src/cnnClassifier/config/configuration.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.utils.common import read_yaml, create_directories
2
+ from cnnClassifier.constant import *
3
+ from cnnClassifier.entity.config_entity import (DataIngestionConfig,
4
+ PrepareBaseModelConfig,
5
+ TrainingConfig,EvaluationConfig)
6
+ import os
7
+
8
+
9
+
10
+ class ConfigurationManager:
11
+ def __init__(
12
+ self,
13
+ config_filepath = CONFIG_FILE_PATH,
14
+ params_filepath = PARAMS_FILE_PATH
15
+ ):
16
+ self.config = read_yaml(config_filepath)
17
+ self.params = read_yaml(params_filepath)
18
+
19
+ create_directories([self.config.atifacts_root])
20
+
21
+
22
+
23
+ def get_data_ingestion_config(self) -> DataIngestionConfig:
24
+ config = self.config.data_ingestion
25
+ create_directories([config.root_dir])
26
+
27
+ data_ingestion_config = DataIngestionConfig(
28
+ root_dir=config.root_dir,
29
+ source_URL=config.source_URL,
30
+ local_data_file=config.local_data_file,
31
+ unzip_dir=config.unzip_dir
32
+ )
33
+
34
+ return data_ingestion_config
35
+
36
+
37
+ def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
38
+ config = self.config.prepare_base_model
39
+
40
+ create_directories([config.root_dir])
41
+
42
+ prepare_base_model_config = PrepareBaseModelConfig(
43
+ root_dir=Path(config.root_dir),
44
+ base_model_path=Path(config.base_model_path),
45
+ updated_base_model_path=Path(config.updated_base_model_path),
46
+ params_image_size=self.params.IMAGE_SIZE,
47
+ params_learning_rate=self.params.LEARNING_RATE,
48
+ params_include_top=self.params.INCLUDE_TOP,
49
+ params_weights=self.params.WEIGHTS,
50
+ params_classes=self.params.CLASSES
51
+ )
52
+
53
+ return prepare_base_model_config
54
+
55
+
56
+ def get_training_config(self) -> TrainingConfig:
57
+ training = self.config.training
58
+ prepare_base_model = self.config.prepare_base_model
59
+ params = self.params
60
+ training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-ct-scan-image")
61
+ create_directories([
62
+ Path(training.root_dir)
63
+ ])
64
+
65
+ training_config = TrainingConfig(
66
+ root_dir=Path(training.root_dir),
67
+ trained_model_path=Path(training.trained_model_path),
68
+ updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
69
+ training_data=Path(training_data),
70
+ params_epochs=params.EPOCHS,
71
+ params_batch_size=params.BATCH_SIZE,
72
+ params_is_augmentation=params.AUGMENTATION,
73
+ params_image_size=params.IMAGE_SIZE
74
+ )
75
+
76
+ return training_config
77
+
78
+ def get_evaluation_config(self) -> EvaluationConfig:
79
+ eval_config = EvaluationConfig(
80
+ path_of_model='artifacts/training/model.h5',
81
+ training_data='artifacts/data_ingestion/unzip/kidney-ct-scan-image',
82
+ mlflow_uri='https://dagshub.com/azizulhakim8291/Kidney-disease-classification-mlops.mlflow',
83
+ all_params= self.params,
84
+ params_image_size=self.params.IMAGE_SIZE,
85
+ params_batch_size=self.params.BATCH_SIZE
86
+ )
87
+ return eval_config
88
+
src/cnnClassifier/constant/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
src/cnnClassifier/entity/__init__.py ADDED
File without changes
src/cnnClassifier/entity/config_entity.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # entity
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ @dataclass(frozen=True)
6
+ class DataIngestionConfig:
7
+ root_dir: Path
8
+ source_URL: str
9
+ local_data_file: Path
10
+ unzip_dir: Path
11
+
12
+ @dataclass(frozen=True)
13
+ class PrepareBaseModelConfig:
14
+ root_dir: Path
15
+ base_model_path: Path
16
+ updated_base_model_path: Path
17
+ params_image_size: list
18
+ params_learning_rate: float
19
+ params_include_top: bool
20
+ params_weights: str
21
+ params_classes: int
22
+
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class TrainingConfig:
27
+ root_dir: Path
28
+ trained_model_path: Path
29
+ updated_base_model_path: Path
30
+ training_data: Path
31
+ params_epochs: int
32
+ params_batch_size: int
33
+ params_is_augmentation: bool
34
+ params_image_size: list
35
+
36
+
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class EvaluationConfig:
41
+ path_of_model: Path
42
+ training_data: Path
43
+ all_params: dict
44
+ mlflow_uri: str
45
+ params_image_size: list
46
+ params_batch_size: int
src/cnnClassifier/pipeline/__init__.py ADDED
File without changes
src/cnnClassifier/pipeline/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from tensorflow.keras.models import load_model
3
+ from tensorflow.keras.preprocessing import image
4
+ import os
5
+
6
+
7
+
8
+ class Prediction:
9
+ def __init__(self,filename):
10
+ self.filename =filename
11
+
12
+
13
+
14
+ def predict(self):
15
+ # load model
16
+ model = load_model("model.h5")
17
+
18
+ imagename = self.filename
19
+ test_image = image.load_img(imagename, target_size = (224,224))
20
+ test_image = image.img_to_array(test_image)
21
+ test_image = np.expand_dims(test_image, axis = 0)
22
+ result = np.argmax(model.predict(test_image), axis=1)
23
+ print(result)
24
+
25
+ if result[0] == 1:
26
+ prediction = 'Normal'
27
+ else:
28
+ prediction = 'Tumor'
29
+
30
+ return prediction
src/cnnClassifier/pipeline/stage_01_data_ingestion.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.data_ingestion import DataIngestion
3
+ from cnnClassifier import logger
4
+ STAGE_NAME = "Data Ingestion stage"
5
+
6
+
7
+ class DataIngestionTrainingPipeline:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def main(self):
12
+ config = ConfigurationManager()
13
+ data_ingestion_config = config.get_data_ingestion_config()
14
+ data_ingestion = DataIngestion(config=data_ingestion_config)
15
+ data_ingestion.download_file()
16
+ data_ingestion.extract_zip_file()
17
+
18
+
19
+
20
+ if __name__ == '__main__':
21
+ try:
22
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
23
+ obj = DataIngestionTrainingPipeline()
24
+ obj.main()
25
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
26
+ except Exception as e:
27
+ logger.exception(e)
28
+ raise e
src/cnnClassifier/pipeline/stage_02_prepare_base_model.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.prepare_base_model import PrepareBaseModel
3
+ from cnnClassifier import logger
4
+
5
+
6
+ STAGE_NAME = "Prepare base model"
7
+
8
+
9
+ class PrepareBaseModelTrainingPipeline:
10
+ def __init__(self):
11
+ pass
12
+
13
+ def main(self):
14
+ config = ConfigurationManager()
15
+ prepare_base_model_config = config.get_prepare_base_model_config()
16
+ prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
17
+ prepare_base_model.get_base_model()
18
+ prepare_base_model.update_base_model()
19
+
20
+
21
+
22
+ if __name__ == '__main__':
23
+ try:
24
+ logger.info(f"*******************")
25
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
26
+ obj = PrepareBaseModelTrainingPipeline()
27
+ obj.main()
28
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
29
+ except Exception as e:
30
+ logger.exception(e)
31
+ raise e
src/cnnClassifier/pipeline/stage_03_train_model.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.training import Training
3
+
4
+ from cnnClassifier import logger
5
+
6
+
7
+ STAGE_NAME = "Model Training"
8
+
9
+
10
+ class ModelTrainingPipeline:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def main(self):
15
+ config = ConfigurationManager()
16
+ training_config = config.get_training_config()
17
+ training = Training(config=training_config)
18
+ training.get_base_model()
19
+ training.train_valid_generator()
20
+ training.train()
21
+
22
+
23
+
24
+ if __name__ == '__main__':
25
+ try:
26
+ logger.info(f"*******************")
27
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
28
+ obj = ModelTrainingPipeline()
29
+ obj.main()
30
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
31
+ except Exception as e:
32
+ logger.exception(e)
33
+ raise e
src/cnnClassifier/pipeline/stage_04_evaluation.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.evaluation import Evaluation
3
+
4
+ from cnnClassifier import logger
5
+
6
+
7
+ STAGE_NAME = "Evaluation"
8
+
9
+
10
+ class EvaluationTrainingPipeline:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def main(self):
15
+ config = ConfigurationManager()
16
+ eval_config = config.get_evaluation_config()
17
+ evaluation = Evaluation(eval_config)
18
+ evaluation.evaluation()
19
+ evaluation.log_into_mlflow()
20
+
21
+
22
+
23
+ if __name__ == '__main__':
24
+ try:
25
+ logger.info(f"*******************")
26
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
27
+ obj = EvaluationTrainingPipeline()
28
+ obj.main()
29
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
30
+ except Exception as e:
31
+ logger.exception(e)
32
+ raise e
src/cnnClassifier/utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
src/cnnClassifier/utils/common.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from box.exceptions import BoxValueError
3
+ import yaml
4
+ from cnnClassifier import logger
5
+ import json
6
+ import joblib
7
+ from ensure import ensure_annotations
8
+ from box import ConfigBox
9
+ from pathlib import Path
10
+ from typing import Any
11
+ import base64
12
+
13
+
14
+
15
+ @ensure_annotations
16
+ def read_yaml(path_to_yaml: Path) -> ConfigBox:
17
+ """reads yaml file and returns
18
+
19
+ Args:
20
+ path_to_yaml (str): path like input
21
+
22
+ Raises:
23
+ ValueError: if yaml file is empty
24
+ e: empty file
25
+
26
+ Returns:
27
+ ConfigBox: ConfigBox type
28
+ """
29
+ try:
30
+ with open(path_to_yaml) as yaml_file:
31
+ content = yaml.safe_load(yaml_file)
32
+ logger.info(f"yaml file: {path_to_yaml} loaded successfully")
33
+ return ConfigBox(content)
34
+ except BoxValueError:
35
+ raise ValueError("yaml file is empty")
36
+ except Exception as e:
37
+ raise e
38
+
39
+
40
+
41
+ @ensure_annotations
42
+ def create_directories(path_to_directories: list, verbose = True):
43
+ """create list of directories
44
+
45
+ Args:
46
+ path_to_directories (list): list of path of directories
47
+ ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
48
+ """
49
+
50
+ for path in path_to_directories:
51
+ os.makedirs(path, exist_ok=True)
52
+ if verbose:
53
+ logger.info(f'Created directory at: {path}')
54
+
55
+ @ensure_annotations
56
+ def save_json(path: Path, data: dict):
57
+ """save json data
58
+
59
+ Args:
60
+ path (Path): path to json file
61
+ data (dict): data to be saved in json file
62
+ """
63
+
64
+
65
+ with open(path, 'w') as f:
66
+ json.dump(data, f, indent=4)
67
+
68
+ logger.info(f'Json file saved at: {path}')
69
+
70
+
71
+
72
+
73
+ @ensure_annotations
74
+ def load_json(path: Path) -> ConfigBox:
75
+ """load json files data
76
+
77
+ Args:
78
+ path (Path): path to json file
79
+
80
+ Returns:
81
+ ConfigBox: data as class attributes instead of dict
82
+ """
83
+
84
+ with open(path, 'r') as f:
85
+ content = json.load(f)
86
+
87
+ logger.info(f"Json file loaded successfully from: {path}")
88
+ return ConfigBox
89
+
90
+
91
+ @ensure_annotations
92
+ def save_bin(data: Any, path: Path):
93
+ """save binary file
94
+
95
+ Args:
96
+ data (Any): data to be saved as binary
97
+ path (Path): path to binary file
98
+ """
99
+ joblib.dump(value=data, filename=path)
100
+ logger.info(f'binary file saved at: {path}')
101
+
102
+
103
+
104
+ @ensure_annotations
105
+ def load_bin(path: Path) -> ConfigBox:
106
+ """load binary data
107
+
108
+ Args:
109
+ path (Path): path to binary file
110
+
111
+ Returns:
112
+ Any: object stored in the file
113
+ """
114
+
115
+ data = joblib.load(path)
116
+ logger.info(f'binary file has been loaded successfully from : {path}')
117
+ return data
118
+
119
+
120
+ @ensure_annotations
121
+ def get_size(path: Path) -> str:
122
+ '''
123
+ get size in KB
124
+
125
+ Args:
126
+ Path (Path): path of the file
127
+
128
+ Returns:
129
+ str: size in KB'''
130
+
131
+ size_in_kb = round(os.path.getsize(path)/1024)
132
+ return f"~ {size_in_kb} KB"
133
+
134
+
135
+ def decodeImage(imgstring, fileName):
136
+ imgdata = base64.b64decode(imgstring)
137
+ with open(fileName, 'wb') as f:
138
+ f.write(imgdata)
139
+ f.close()
140
+
141
+
142
+
143
+ def encodeImageIntoBase64(croppedImagePath):
144
+ with open(croppedImagePath, 'rb') as f:
145
+ return base64.b64decode(f.read())
template.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
6
+
7
+ project_name = 'cnnClassifier'
8
+
9
+ list_of_files = [
10
+ '.github/workflows/.gitkeep',
11
+ f"src/{project_name}/__init__.py",
12
+ f"src/{project_name}/components/__init__.py",
13
+ f"src/{project_name}/utils/__init__.py",
14
+ f"src/{project_name}/config/__init__.py",
15
+ f"src/{project_name}/config/configuration.py",
16
+ f"src/{project_name}/pipeline/__init__.py",
17
+ f"src/{project_name}/entity/__init__.py",
18
+ f"src/{project_name}/constant/__init__.py",
19
+ 'config/config.yaml',
20
+ 'dvc.yaml',
21
+ 'params.yaml',
22
+ 'requirements.txt',
23
+ 'setup.py',
24
+ 'research/trials.ipynb',
25
+
26
+ ]
27
+
28
+ for filepath in list_of_files:
29
+ filepath = Path(filepath)
30
+ filedir, filename = os.path.split(filepath)
31
+
32
+
33
+ if filedir != "":
34
+ os.makedirs(filedir, exist_ok=True)
35
+ logging.info(f'Creating directory; {filedir} for the file: {filename}')
36
+
37
+ if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
38
+ with open(filepath, 'w') as f:
39
+ pass
40
+ logging.info(f'Creating an empty file: {filename}')
41
+ else:
42
+ logging.info(f'File: {filename} already exists')