KingAsiedu commited on
Commit
70b7c0d
·
1 Parent(s): e300d41

Upload 11 files

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ venv
2
+ venv/
3
+ .venv/
4
+ src/__pycache__
5
+ src/__pycache__/app.cpython-39.pyc
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+
5
+ COPY ./requirements.txt /requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]
Models/.gitkeep ADDED
File without changes
Models/Sepssis_in_ICU.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
Binary file (1.64 kB). View file
 
src/Sepsis_App/__pycache__/main.cpython-310.pyc ADDED
Binary file (4.35 kB). View file
 
src/Sepsis_App/gradient_boosting_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11bc712d26e3044165144ece900b92eed6eb41f80396f3ce7d53704133400684
3
+ size 1089788
src/Sepsis_App/main.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI # Import the FastAPI framework for building APIs
2
+ from typing import List, Literal # Import typing hints for function annotations
3
+ from pydantic import BaseModel # Import BaseModel for creating data models
4
+ import uvicorn # Import uvicorn for running the FastAPI app
5
+ import pandas as pd # Import pandas library for data manipulation
6
+ import pickle, os # Import pickle and os modules for handling files and data serialization
7
+
8
+ # Define a function to load machine learning components
9
+ def load_ml_components(fp):
10
+ '''Load machine learning to re-use in app '''
11
+ with open(fp, 'rb') as f:
12
+ object = pickle.load(f) # Load a pickled object (machine learning model)
13
+ return object # Return the loaded object
14
+
15
+ # Define a Pydantic model for the input data
16
+ class Sepsis(BaseModel):
17
+ """
18
+ Represents the input data for the model prediction.
19
+
20
+ Attributes:
21
+ PlasmaGlucose (int): The plasma glucose level of the individual.
22
+ BloodWorkResult_1 (int): The result of blood work test 1.
23
+ BloodPressure (int): The blood pressure reading of the individual.
24
+ BloodWorkResult_2 (int): The result of blood work test 2.
25
+ BloodWorkResult_3 (int): The result of blood work test 3.
26
+ BodyMassIndex (float): The body mass index of the individual.
27
+ BloodWorkResult_4 (float): The result of blood work test 4.
28
+ Age (int): The age of the individual.
29
+
30
+ 'sepsis' is the target feature which holds 0 = Negative and 1 = Positive.
31
+ """
32
+ # Define the input features as class attributes
33
+
34
+ PlasmaGlucose : int
35
+ BloodWorkResult_1 : int
36
+ BloodPressure : int
37
+ BloodWorkResult_2 : int
38
+ BloodWorkResult_3 : int
39
+ BodyMassIndex : float
40
+ BloodWorkResult_4 : float
41
+ Age : int
42
+
43
+ # Setup
44
+ """
45
+ Get the absolute path of the current model file.
46
+ then extracts the directory path from the absolute path of the model file.
47
+ This is useful when we need to locate the file
48
+ relative to our script's location.
49
+ """
50
+ # Get the absolute path of the current directory
51
+ DIRPATH = os.path.dirname(os.path.realpath(__file__))
52
+
53
+ # Join the directory path with the model file name
54
+ ml_core_fp = os.path.join(DIRPATH, 'gradient_boosting_model.pkl')
55
+
56
+ # Define the labels manually
57
+ labels = ['Negative', 'Positive']
58
+
59
+ # Load the machine learning components
60
+ end2end_pipeline = load_ml_components(fp=ml_core_fp) # Load the machine learning model from the file
61
+
62
+ # Access the model step of the pipeline
63
+ model = end2end_pipeline.named_steps['model'] # Access the model component from the pipeline
64
+
65
+ # Create a dictionary to map index to labels
66
+ idx_to_labels = {i: l for (i, l) in enumerate(labels)}
67
+
68
+ # Print predictable labels and index-to-label mapping
69
+ print(f'\n[Info]Predictable labels: {labels}')
70
+ print(f'\n[Info]Indices to labels: {idx_to_labels}')
71
+
72
+ # Print information about the loaded model
73
+ print(f'\n[Info]ML components loaded - Model: {model}')
74
+
75
+ # Create the FastAPI application instance
76
+ app = FastAPI(title='Sepsis Prediction API') # Create a FastAPI instance with a title
77
+
78
+ # Define a route to handle the root endpoint
79
+ @app.get('/')
80
+ async def root():
81
+ return{
82
+ "info": "Sepsis Prediction API: This interface is about the prediction of sepsis disease of patients in ICU."
83
+ }
84
+
85
+
86
+ # Define a route to handle the prediction
87
+ @app.post('/classify')
88
+ async def sepsis_classification(sepsis: Sepsis):
89
+ # Define checkmarks for printing symbols
90
+ red_x = u"\u274C"
91
+ green_checkmark = "\033[32m" + u"\u2713" + "\033[0m" #u"\u2713"
92
+
93
+ try:
94
+ # # Create a dataframe from the input data
95
+ df = pd.DataFrame(
96
+ {
97
+ 'PlasmaGlucose': [sepsis.PlasmaGlucose],
98
+ 'BloodWorkResult_1(U/ml)': [sepsis.BloodWorkResult_1],
99
+ 'BloodPressure(mm Hg)': [sepsis.BloodPressure],
100
+ 'BloodWorkResult_2(mm)': [sepsis.BloodWorkResult_2],
101
+ 'BloodWorkResult_3(U/ml)': [sepsis.BloodWorkResult_3],
102
+ 'BodyMassIndex(kg/m)^2': [sepsis.BodyMassIndex],
103
+ 'BloodWorkResult_4(U/ml)': [sepsis.BloodWorkResult_4],
104
+ 'Age (years)': [sepsis.Age]}
105
+ )
106
+ # Print input data as a dataframe
107
+ print(f'[Info]Input data as dataframe:\n{df.to_markdown()}')
108
+
109
+ # Predict using the loaded model
110
+ output = model.predict(df)
111
+ confidence_scores = model.predict_proba(df) # Predict the probabilities for each class
112
+ print(f'Considering the best confidence score, the output is: {output}')
113
+ print(f'Confidence scores: {confidence_scores}')
114
+
115
+ # Get index of predicted class
116
+ predicted_idx = output
117
+
118
+ # Store index then replace by the matching label
119
+ df['Predicted label'] = predicted_idx
120
+ predicted_label = df['Predicted label'].replace(idx_to_labels)
121
+ df['Predicted label'] = predicted_label
122
+
123
+ # Map predicted indices to labels
124
+ predicted_labels = [idx_to_labels[idx] for idx in output]
125
+
126
+ # Store the predicted probabilities for each class in the dataframe
127
+ for i, label in enumerate(labels):
128
+ df[f'Confidence_{label}'] = confidence_scores[:, i] * 100 # Convert to percentage
129
+
130
+ # Print the result with confidence scores as percentages
131
+ if predicted_labels:
132
+ i = 0
133
+ label = predicted_labels[0] # Get the first predicted label
134
+ confidence_score_percentage = max(confidence_scores[i]) * 100
135
+ print(f"{green_checkmark} This patient in ICU has been classified as Sepsis {label} with confidence of: {confidence_score_percentage:.1f}%")
136
+
137
+ msg = "Execution went fine"
138
+ code = 1
139
+ pred = df.to_dict("records")
140
+
141
+
142
+ except Exception as e:
143
+ print(f"\033[91m{red_x} An exception occurred: {str(e)}")
144
+ msg = "Execution did not go well"
145
+ code = 0
146
+ pred = None
147
+
148
+ # Create the API response
149
+ result = {"Execution_msg": msg, "execution_code": code, "prediction": pred}
150
+ return result
151
+
152
+ # Run the FastAPI application using uvicorn
153
+ if __name__ == "__main__":
154
+ uvicorn.run("main:app", reload = True)
src/Sepsis_App/readme.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sepsis Classification App Explained
2
+
3
+ ## Importing Required Modules:
4
+ In the beginning, the script imports essential modules that will be used throughout the program. The FastAPI module is imported from the FastAPI framework, which will be used to create the API. The List and Literal classes are imported from the typing module to define annotations for function arguments. The BaseModel class is imported from the pydantic module, allowing us to create data models with defined attributes. The uvicorn module is imported for running the FastAPI app, while the pandas module is imported for data manipulation. Finally, the pickle and os modules are imported for handling file operations and data serialization.
5
+
6
+ ## Defining a Function to Load Machine Learning Components:
7
+ A function named load_ml_components is defined to load machine learning components from a file. This function takes a file path as an argument, opens the file in binary read mode ('rb'), and uses the pickle.load() method to load the pickled object (machine learning model) from the file. The loaded object is returned to the caller.
8
+
9
+ ## Defining the Pydantic Model for Input Data:
10
+ A Pydantic data model named Sepsis is defined to represent the input data required for model prediction. This model is subclassed from the BaseModel class. It defines attributes that correspond to the features required for the model prediction, such as PlasmaGlucose, BloodWorkResult_1, BloodPressure, and so on. Additionally, the class attributes are documented using docstrings, explaining their purpose and data types.
11
+
12
+ ## Setting Up the Application:
13
+ This section deals with setting up the initial environment for the application. It first obtains the absolute path of the current directory using os.path.dirname(os.path.realpath(__file__)). The absolute path of the machine learning model file is then formed by joining the directory path with the filename (gradient_boosting_model.pkl). The labels list is defined manually to hold the possible target labels, i.e., 'Negative' and 'Positive'.
14
+
15
+ ## Loading Machine Learning Components:
16
+ Here, the script loads the machine learning components by calling the load_ml_components function with the previously obtained model file path (ml_core_fp). The loaded model is stored in the end2end_pipeline variable, and the actual machine learning model step is extracted and stored in the model variable using end2end_pipeline.named_steps['model'].
17
+
18
+ ## Creating Index-to-Label Mapping:
19
+ An index-to-label mapping is created using a dictionary comprehension. The dictionary, named idx_to_labels, maps the index to the corresponding label. This mapping will be useful later when converting predicted indices back to their corresponding labels.
20
+
21
+ ## Printing Information About the Loaded Model:
22
+ This part prints informative messages about the loaded model. The script prints the predictable labels extracted from the labels list, as well as the index-to-label mapping. It then prints information about the loaded model using the model variable, indicating that the machine learning components have been successfully loaded.
23
+
24
+ ## Creating the FastAPI Application Instance:
25
+ The FastAPI application instance is created using the FastAPI class. The title argument is set to 'Sepsis Prediction API', giving a title to the API.
26
+
27
+ ## Defining the Root Endpoint:
28
+ A route is defined using the @app.get('/') decorator, which handles the root endpoint. This route returns a JSON response containing information about the Sepsis Prediction API, explaining its purpose.
29
+
30
+ ## Defining the Prediction Route:
31
+ Another route is defined using the @app.post('/classify') decorator. This route handles predictions for sepsis classification. Inside the function sepsis_classification, symbols for checkmarks and a red "X" are defined using Unicode escape codes. The function processes the incoming data according to the defined Pydantic model and performs the following steps:
32
+
33
+ 1. Creates a pandas DataFrame named df containing the input data attributes from the request.
34
+ 2. Prints the input data in DataFrame format.
35
+ 3. Uses the loaded machine learning model to predict the class labels for the input data. Additionally, it predicts the class probabilities using the predict_proba() method.
36
+ 4. Retrieves the predicted class index and replaces it with the corresponding label in the DataFrame.
37
+ 5. Maps the predicted indices to their corresponding labels.
38
+ 6. Calculates and stores the confidence scores (probabilities) for each class in the DataFrame.
39
+ 7. Prints the prediction results along with confidence scores as percentages.
40
+ 8. Handles exceptions that may occur during the prediction process, printing error messages and returning a response indicating execution status.
41
+
42
+ ## Running the FastAPI Application:
43
+ Finally, the script checks if it's the main module and then runs the FastAPI application using uvicorn. The argument "main:app" specifies the module name and the FastAPI application instance. The reload argument is set to True to allow automatic reloading of the server.
44
+
45
+ This code represents a complete workflow for setting up a FastAPI-based API for sepsis prediction using a pre-trained machine learning model. It handles requests for classifying sepsis cases and provides informative responses, including predicted labels and confidence scores
src/gradient_boosting_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11bc712d26e3044165144ece900b92eed6eb41f80396f3ce7d53704133400684
3
+ size 1089788
src/main.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI # Import the FastAPI framework for building APIs
2
+ from typing import List, Literal # Import typing hints for function annotations
3
+ from pydantic import BaseModel # Import BaseModel for creating data models
4
+ import uvicorn # Import uvicorn for running the FastAPI app
5
+ import pandas as pd # Import pandas library for data manipulation
6
+ import pickle, os # Import pickle and os modules for handling files and data serialization
7
+
8
+ # Define a function to load machine learning components
9
+ def load_ml_components(fp):
10
+ '''Load machine learning to re-use in app '''
11
+ with open(fp, 'rb') as f:
12
+ object = pickle.load(f) # Load a pickled object (machine learning model)
13
+ return object # Return the loaded object
14
+
15
+ # Define a Pydantic model for the input data
16
+ class Sepsis(BaseModel):
17
+ """
18
+ Represents the input data for the model prediction.
19
+
20
+ Attributes:
21
+ PlasmaGlucose (int): The plasma glucose level of the individual.
22
+ BloodWorkResult_1 (int): The result of blood work test 1.
23
+ BloodPressure (int): The blood pressure reading of the individual.
24
+ BloodWorkResult_2 (int): The result of blood work test 2.
25
+ BloodWorkResult_3 (int): The result of blood work test 3.
26
+ BodyMassIndex (float): The body mass index of the individual.
27
+ BloodWorkResult_4 (float): The result of blood work test 4.
28
+ Age (int): The age of the individual.
29
+
30
+ 'sepsis' is the target feature which holds 0 = Negative and 1 = Positive.
31
+ """
32
+ # Define the input features as class attributes
33
+
34
+ # PlasmaGlucose : int
35
+ #BloodWorkResult_1 : int
36
+ #BloodPressure : int
37
+ #BloodWorkResult_2 : int
38
+ #BloodWorkResult_3 : int
39
+ #BodyMassIndex : float
40
+ #BloodWorkResult_4 : float
41
+ #Age : int
42
+
43
+ # Setup
44
+ """
45
+ Get the absolute path of the current model file.
46
+ then extracts the directory path from the absolute path of the model file.
47
+ This is useful when we need to locate the file
48
+ relative to our script's location.
49
+ """
50
+ # Get the absolute path of the current directory
51
+ DIRPATH = os.path.dirname(os.path.realpath(__file__))
52
+
53
+ # Join the directory path with the model file name
54
+ ml_core_fp = os.path.join(DIRPATH, 'gradient_boosting_model.pkl')
55
+
56
+ # Define the labels manually
57
+ labels = ['Negative', 'Positive']
58
+
59
+ # Load the machine learning components
60
+ end2end_pipeline = load_ml_components(fp=ml_core_fp) # Load the machine learning model from the file
61
+
62
+ # Access the model step of the pipeline
63
+ model = end2end_pipeline.named_steps['model'] # Access the model component from the pipeline
64
+
65
+ # Create a dictionary to map index to labels
66
+ idx_to_labels = {i: l for (i, l) in enumerate(labels)}
67
+
68
+ # Print predictable labels and index-to-label mapping
69
+ print(f'\n[Info]Predictable labels: {labels}')
70
+ print(f'\n[Info]Indices to labels: {idx_to_labels}')
71
+
72
+ # Print information about the loaded model
73
+ print(f'\n[Info]ML components loaded - Model: {model}')
74
+
75
+ # Create the FastAPI application instance
76
+ app = FastAPI(title='Sepsis Prediction API') # Create a FastAPI instance with a title
77
+
78
+ # Define a route to handle the root endpoint
79
+ @app.get('/')
80
+ def root():
81
+ return{
82
+ "info" : "Sepsis Prediction API: This interface is about the prediction of sepsis disease of patients in ICU."
83
+ }
84
+
85
+
86
+ # Define a route to handle the prediction
87
+ @app.post('/classify')
88
+ def sepsis_classification(PlasmaGlucose : int,
89
+ BloodWorkResult_1 : int,
90
+ BloodPressure : int,
91
+ BloodWorkResult_2 : int,
92
+ BloodWorkResult_3 : int,
93
+ BodyMassIndex : float,
94
+ BloodWorkResult_4 : float,
95
+ Age : int):
96
+ # Define checkmarks for printing symbols
97
+ red_x = u"\u274C"
98
+ green_checkmark = "\033[32m" + u"\u2713" + "\033[0m" #u"\u2713"
99
+
100
+ try:
101
+ # # Create a dataframe from the input data, to solve the indexing issue, wrapp dict in a list
102
+ df = pd.DataFrame(
103
+ [ {
104
+ 'PlasmaGlucose': PlasmaGlucose,
105
+ 'BloodWorkResult_1(U/ml)': BloodWorkResult_1,
106
+ 'BloodPressure(mm Hg)': BloodPressure,
107
+ 'BloodWorkResult_2(mm)': BloodWorkResult_2,
108
+ 'BloodWorkResult_3(U/ml)': BloodWorkResult_3,
109
+ 'BodyMassIndex(kg/m)^2': BodyMassIndex,
110
+ 'BloodWorkResult_4(U/ml)':BloodWorkResult_4,
111
+ 'Age (years)':Age} ]
112
+ )
113
+ # Print input data as a dataframe
114
+ print(f'[Info]Input data as dataframe:\n{df.to_markdown()}')
115
+
116
+ # Predict using the loaded model
117
+ output = model.predict(df)
118
+ confidence_scores = model.predict_proba(df) # Predict the probabilities for each class
119
+ print(f'Considering the best confidence score, the output is: {output}')
120
+ print(f'Confidence scores: {confidence_scores}')
121
+
122
+ # Get index of predicted class
123
+ predicted_idx = output
124
+
125
+ # Store index then replace by the matching label
126
+ df['Predicted label'] = predicted_idx
127
+ predicted_label = df['Predicted label'].replace(idx_to_labels)
128
+ df['Predicted label'] = predicted_label
129
+
130
+ # Map predicted indices to labels
131
+ predicted_labels = [idx_to_labels[idx] for idx in output]
132
+
133
+ # Store the predicted probabilities for each class in the dataframe
134
+ for i, label in enumerate(labels):
135
+ df[f'Confidence_{label}'] = confidence_scores[:, i] * 100 # Convert to percentage
136
+
137
+ # Print the result with confidence scores as percentages
138
+ if predicted_labels:
139
+ i = 0
140
+ label = predicted_labels[0] # Get the first predicted label
141
+ confidence_score_percentage = max(confidence_scores[i]) * 100
142
+ print(f"{green_checkmark} This patient in ICU has been classified as Sepsis {label} with confidence of: {confidence_score_percentage:.1f}%")
143
+
144
+ msg = "Execution went fine"
145
+ code = 1
146
+ pred = df.to_dict("records")
147
+
148
+
149
+ except Exception as e:
150
+ print(f"\033[91m{red_x} An exception occurred: {str(e)}")
151
+ msg = "Execution did not go well"
152
+ code = 0
153
+ pred = None
154
+
155
+ # Create the API response
156
+ result = {"Execution_msg": msg, "execution_code": code, "prediction": pred}
157
+ return result
158
+
159
+ # Run the FastAPI application using uvicorn
160
+ if __name__ == "__main__":
161
+ uvicorn.run("main:app", reload = False)