Spaces:

KingAsiedu
/

Sepsis_Analysis

Build error

App Files Files Community

KingAsiedu commited on Aug 26, 2023

Commit

70b7c0d

1 Parent(s): e300d41

Upload 11 files

Browse files

Files changed (11) hide show

.gitignore +5 -0
Dockerfile +11 -0
Models/.gitkeep +0 -0
Models/Sepssis_in_ICU.ipynb +0 -0
requirements.txt +0 -0
src/Sepsis_App/__pycache__/main.cpython-310.pyc +0 -0
src/Sepsis_App/gradient_boosting_model.pkl +3 -0
src/Sepsis_App/main.py +154 -0
src/Sepsis_App/readme.md +45 -0
src/gradient_boosting_model.pkl +3 -0
src/main.py +161 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+venv
+venv/
+.venv/
+src/__pycache__
+src/__pycache__/app.cpython-39.pyc

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /app
+COPY ./requirements.txt /requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /requirements.txt
+COPY . .
+CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "7860"]

Models/.gitkeep ADDED Viewed

File without changes

Models/Sepssis_in_ICU.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

Binary file (1.64 kB). View file

src/Sepsis_App/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (4.35 kB). View file

src/Sepsis_App/gradient_boosting_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11bc712d26e3044165144ece900b92eed6eb41f80396f3ce7d53704133400684
+size 1089788

src/Sepsis_App/main.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from fastapi import FastAPI # Import the FastAPI framework for building APIs
+from typing import List, Literal # Import typing hints for function annotations
+from pydantic import BaseModel # Import BaseModel for creating data models
+import uvicorn # Import uvicorn for running the FastAPI app
+import pandas as pd # Import pandas library for data manipulation
+import pickle, os # Import pickle and os modules for handling files and data serialization
+# Define a function to load machine learning components
+def load_ml_components(fp):
+    '''Load machine learning to re-use in app '''
+    with open(fp, 'rb') as f:
+        object = pickle.load(f) # Load a pickled object (machine learning model)
+    return object # Return the loaded object
+# Define a Pydantic model for the input data
+class Sepsis(BaseModel):
+    """
+    Represents the input data for the model prediction.
+    Attributes:
+        PlasmaGlucose (int): The plasma glucose level of the individual.
+        BloodWorkResult_1 (int): The result of blood work test 1.
+        BloodPressure (int): The blood pressure reading of the individual.
+        BloodWorkResult_2 (int): The result of blood work test 2.
+        BloodWorkResult_3 (int): The result of blood work test 3.
+        BodyMassIndex (float): The body mass index of the individual.
+        BloodWorkResult_4 (float): The result of blood work test 4.
+        Age (int): The age of the individual.
+        'sepsis' is the target feature which holds 0 = Negative and 1 = Positive.
+    """
+    # Define the input features as class attributes
+    PlasmaGlucose : int
+    BloodWorkResult_1 : int
+    BloodPressure : int
+    BloodWorkResult_2 : int
+    BloodWorkResult_3 : int
+    BodyMassIndex : float
+    BloodWorkResult_4 : float
+    Age : int
+# Setup
+"""
+Get the absolute path of the current model file.
+then extracts the directory path from the absolute path of the model file.
+This is useful when we need to locate the file
+relative to our script's location.
+"""
+# Get the absolute path of the current directory
+DIRPATH = os.path.dirname(os.path.realpath(__file__))
+# Join the directory path with the model file name
+ml_core_fp = os.path.join(DIRPATH, 'gradient_boosting_model.pkl')
+# Define the labels manually
+labels = ['Negative', 'Positive']
+# Load the machine learning components
+end2end_pipeline = load_ml_components(fp=ml_core_fp) # Load the machine learning model from the file
+# Access the model step of the pipeline
+model = end2end_pipeline.named_steps['model'] # Access the model component from the pipeline
+# Create a dictionary to map index to labels
+idx_to_labels = {i: l for (i, l) in enumerate(labels)}
+# Print predictable labels and index-to-label mapping
+print(f'\n[Info]Predictable labels: {labels}')
+print(f'\n[Info]Indices to labels: {idx_to_labels}')
+# Print information about the loaded model
+print(f'\n[Info]ML components loaded - Model: {model}')
+# Create the FastAPI application instance
+app = FastAPI(title='Sepsis Prediction API') # Create a FastAPI instance with a title
+# Define a route to handle the root endpoint
+@app.get('/')
+async def root():
+    return{
+        "info": "Sepsis Prediction API: This interface is about the prediction of sepsis disease of patients in ICU."
+    }
+# Define a route to handle the prediction
+@app.post('/classify')
+async def sepsis_classification(sepsis: Sepsis):
+    # Define checkmarks for printing symbols
+    red_x = u"\u274C"
+    green_checkmark = "\033[32m" + u"\u2713" + "\033[0m" #u"\u2713"
+    try:
+         # # Create a dataframe from the input data
+         df = pd.DataFrame(
+             {
+                'PlasmaGlucose': [sepsis.PlasmaGlucose],
+                'BloodWorkResult_1(U/ml)': [sepsis.BloodWorkResult_1],
+                'BloodPressure(mm Hg)': [sepsis.BloodPressure],
+                'BloodWorkResult_2(mm)': [sepsis.BloodWorkResult_2],
+                'BloodWorkResult_3(U/ml)': [sepsis.BloodWorkResult_3],
+                'BodyMassIndex(kg/m)^2': [sepsis.BodyMassIndex],
+                'BloodWorkResult_4(U/ml)': [sepsis.BloodWorkResult_4],
+                'Age (years)': [sepsis.Age]}
+         )
+         # Print input data as a dataframe
+         print(f'[Info]Input data as dataframe:\n{df.to_markdown()}')
+         # Predict using the loaded model
+         output = model.predict(df)
+         confidence_scores = model.predict_proba(df)  # Predict the probabilities for each class
+         print(f'Considering the best confidence score, the output is: {output}')
+         print(f'Confidence scores: {confidence_scores}')
+         # Get index of predicted class
+         predicted_idx = output
+         # Store index then replace by the matching label
+         df['Predicted label'] = predicted_idx
+         predicted_label = df['Predicted label'].replace(idx_to_labels)
+         df['Predicted label'] = predicted_label
+         # Map predicted indices to labels
+         predicted_labels = [idx_to_labels[idx] for idx in output]
+         # Store the predicted probabilities for each class in the dataframe
+         for i, label in enumerate(labels):
+             df[f'Confidence_{label}'] = confidence_scores[:, i] * 100  # Convert to percentage
+             # Print the result with confidence scores as percentages
+             if predicted_labels:
+                  i = 0
+                  label = predicted_labels[0]  # Get the first predicted label
+                  confidence_score_percentage = max(confidence_scores[i]) * 100
+                  print(f"{green_checkmark} This patient in ICU has been classified as Sepsis {label} with confidence of: {confidence_score_percentage:.1f}%")
+         msg = "Execution went fine"
+         code = 1
+         pred = df.to_dict("records")
+    except Exception as e:
+        print(f"\033[91m{red_x} An exception occurred: {str(e)}")
+        msg = "Execution did not go well"
+        code = 0
+        pred = None
+    # Create the API response
+    result = {"Execution_msg": msg, "execution_code": code, "prediction": pred}
+    return result
+# Run the FastAPI application using uvicorn
+if __name__ == "__main__":
+    uvicorn.run("main:app", reload = True)

src/Sepsis_App/readme.md ADDED Viewed

	@@ -0,0 +1,45 @@

+# Sepsis Classification App Explained
+## Importing Required Modules:
+In the beginning, the script imports essential modules that will be used throughout the program. The FastAPI module is imported from the FastAPI framework, which will be used to create the API. The List and Literal classes are imported from the typing module to define annotations for function arguments. The BaseModel class is imported from the pydantic module, allowing us to create data models with defined attributes. The uvicorn module is imported for running the FastAPI app, while the pandas module is imported for data manipulation. Finally, the pickle and os modules are imported for handling file operations and data serialization.
+## Defining a Function to Load Machine Learning Components:
+A function named load_ml_components is defined to load machine learning components from a file. This function takes a file path as an argument, opens the file in binary read mode ('rb'), and uses the pickle.load() method to load the pickled object (machine learning model) from the file. The loaded object is returned to the caller.
+## Defining the Pydantic Model for Input Data:
+A Pydantic data model named Sepsis is defined to represent the input data required for model prediction. This model is subclassed from the BaseModel class. It defines attributes that correspond to the features required for the model prediction, such as PlasmaGlucose, BloodWorkResult_1, BloodPressure, and so on. Additionally, the class attributes are documented using docstrings, explaining their purpose and data types.
+## Setting Up the Application:
+This section deals with setting up the initial environment for the application. It first obtains the absolute path of the current directory using os.path.dirname(os.path.realpath(__file__)). The absolute path of the machine learning model file is then formed by joining the directory path with the filename (gradient_boosting_model.pkl). The labels list is defined manually to hold the possible target labels, i.e., 'Negative' and 'Positive'.
+## Loading Machine Learning Components:
+Here, the script loads the machine learning components by calling the load_ml_components function with the previously obtained model file path (ml_core_fp). The loaded model is stored in the end2end_pipeline variable, and the actual machine learning model step is extracted and stored in the model variable using end2end_pipeline.named_steps['model'].
+## Creating Index-to-Label Mapping:
+An index-to-label mapping is created using a dictionary comprehension. The dictionary, named idx_to_labels, maps the index to the corresponding label. This mapping will be useful later when converting predicted indices back to their corresponding labels.
+## Printing Information About the Loaded Model:
+This part prints informative messages about the loaded model. The script prints the predictable labels extracted from the labels list, as well as the index-to-label mapping. It then prints information about the loaded model using the model variable, indicating that the machine learning components have been successfully loaded.
+## Creating the FastAPI Application Instance:
+The FastAPI application instance is created using the FastAPI class. The title argument is set to 'Sepsis Prediction API', giving a title to the API.
+## Defining the Root Endpoint:
+A route is defined using the @app.get('/') decorator, which handles the root endpoint. This route returns a JSON response containing information about the Sepsis Prediction API, explaining its purpose.
+## Defining the Prediction Route:
+Another route is defined using the @app.post('/classify') decorator. This route handles predictions for sepsis classification. Inside the function sepsis_classification, symbols for checkmarks and a red "X" are defined using Unicode escape codes. The function processes the incoming data according to the defined Pydantic model and performs the following steps:
+    1.	Creates a pandas DataFrame named df containing the input data attributes from the request.
+    2.	Prints the input data in DataFrame format.
+    3.	Uses the loaded machine learning model to predict the class labels for the input data. Additionally, it predicts the class probabilities using the predict_proba() method.
+    4.	Retrieves the predicted class index and replaces it with the corresponding label in the DataFrame.
+    5.	Maps the predicted indices to their corresponding labels.
+    6.	Calculates and stores the confidence scores (probabilities) for each class in the DataFrame.
+    7.	Prints the prediction results along with confidence scores as percentages.
+    8.	Handles exceptions that may occur during the prediction process, printing error messages and returning a response indicating execution status.
+## Running the FastAPI Application:
+Finally, the script checks if it's the main module and then runs the FastAPI application using uvicorn. The argument "main:app" specifies the module name and the FastAPI application instance. The reload argument is set to True to allow automatic reloading of the server.
+This code represents a complete workflow for setting up a FastAPI-based API for sepsis prediction using a pre-trained machine learning model. It handles requests for classifying sepsis cases and provides informative responses, including predicted labels and confidence scores

src/gradient_boosting_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11bc712d26e3044165144ece900b92eed6eb41f80396f3ce7d53704133400684
+size 1089788

src/main.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from fastapi import FastAPI # Import the FastAPI framework for building APIs
+from typing import List, Literal # Import typing hints for function annotations
+from pydantic import BaseModel # Import BaseModel for creating data models
+import uvicorn # Import uvicorn for running the FastAPI app
+import pandas as pd # Import pandas library for data manipulation
+import pickle, os # Import pickle and os modules for handling files and data serialization
+# Define a function to load machine learning components
+def load_ml_components(fp):
+    '''Load machine learning to re-use in app '''
+    with open(fp, 'rb') as f:
+        object = pickle.load(f) # Load a pickled object (machine learning model)
+    return object # Return the loaded object
+# Define a Pydantic model for the input data
+class Sepsis(BaseModel):
+    """
+    Represents the input data for the model prediction.
+    Attributes:
+        PlasmaGlucose (int): The plasma glucose level of the individual.
+        BloodWorkResult_1 (int): The result of blood work test 1.
+        BloodPressure (int): The blood pressure reading of the individual.
+        BloodWorkResult_2 (int): The result of blood work test 2.
+        BloodWorkResult_3 (int): The result of blood work test 3.
+        BodyMassIndex (float): The body mass index of the individual.
+        BloodWorkResult_4 (float): The result of blood work test 4.
+        Age (int): The age of the individual.
+        'sepsis' is the target feature which holds 0 = Negative and 1 = Positive.
+    """
+    # Define the input features as class attributes
+   # PlasmaGlucose : int
+    #BloodWorkResult_1 : int
+    #BloodPressure : int
+    #BloodWorkResult_2 : int
+    #BloodWorkResult_3 : int
+    #BodyMassIndex : float
+    #BloodWorkResult_4 : float
+    #Age : int
+# Setup
+"""
+Get the absolute path of the current model file.
+then extracts the directory path from the absolute path of the model file.
+This is useful when we need to locate the file
+relative to our script's location.
+"""
+# Get the absolute path of the current directory
+DIRPATH = os.path.dirname(os.path.realpath(__file__))
+# Join the directory path with the model file name
+ml_core_fp = os.path.join(DIRPATH, 'gradient_boosting_model.pkl')
+# Define the labels manually
+labels = ['Negative', 'Positive']
+# Load the machine learning components
+end2end_pipeline = load_ml_components(fp=ml_core_fp) # Load the machine learning model from the file
+# Access the model step of the pipeline
+model = end2end_pipeline.named_steps['model'] # Access the model component from the pipeline
+# Create a dictionary to map index to labels
+idx_to_labels = {i: l for (i, l) in enumerate(labels)}
+# Print predictable labels and index-to-label mapping
+print(f'\n[Info]Predictable labels: {labels}')
+print(f'\n[Info]Indices to labels: {idx_to_labels}')
+# Print information about the loaded model
+print(f'\n[Info]ML components loaded - Model: {model}')
+# Create the FastAPI application instance
+app = FastAPI(title='Sepsis Prediction API') # Create a FastAPI instance with a title
+# Define a route to handle the root endpoint
+@app.get('/')
+def root():
+    return{
+        "info" : "Sepsis Prediction API: This interface is about the prediction of sepsis disease of patients in ICU."
+    }
+# Define a route to handle the prediction
+@app.post('/classify')
+def sepsis_classification(PlasmaGlucose : int,
+    BloodWorkResult_1 : int,
+    BloodPressure : int,
+    BloodWorkResult_2 : int,
+    BloodWorkResult_3 : int,
+    BodyMassIndex : float,
+    BloodWorkResult_4 : float,
+    Age : int):
+    # Define checkmarks for printing symbols
+    red_x = u"\u274C"
+    green_checkmark = "\033[32m" + u"\u2713" + "\033[0m" #u"\u2713"
+    try:
+         # # Create a dataframe from the input data, to solve the indexing issue, wrapp dict in a list
+         df = pd.DataFrame(
+            [ {
+                'PlasmaGlucose': PlasmaGlucose,
+                'BloodWorkResult_1(U/ml)': BloodWorkResult_1,
+                'BloodPressure(mm Hg)': BloodPressure,
+                'BloodWorkResult_2(mm)': BloodWorkResult_2,
+                'BloodWorkResult_3(U/ml)': BloodWorkResult_3,
+                'BodyMassIndex(kg/m)^2': BodyMassIndex,
+                'BloodWorkResult_4(U/ml)':BloodWorkResult_4,
+                'Age (years)':Age}  ]
+         )
+         # Print input data as a dataframe
+         print(f'[Info]Input data as dataframe:\n{df.to_markdown()}')
+         # Predict using the loaded model
+         output = model.predict(df)
+         confidence_scores = model.predict_proba(df)  # Predict the probabilities for each class
+         print(f'Considering the best confidence score, the output is: {output}')
+         print(f'Confidence scores: {confidence_scores}')
+         # Get index of predicted class
+         predicted_idx = output
+         # Store index then replace by the matching label
+         df['Predicted label'] = predicted_idx
+         predicted_label = df['Predicted label'].replace(idx_to_labels)
+         df['Predicted label'] = predicted_label
+         # Map predicted indices to labels
+         predicted_labels = [idx_to_labels[idx] for idx in output]
+         # Store the predicted probabilities for each class in the dataframe
+         for i, label in enumerate(labels):
+             df[f'Confidence_{label}'] = confidence_scores[:, i] * 100  # Convert to percentage
+             # Print the result with confidence scores as percentages
+             if predicted_labels:
+                  i = 0
+                  label = predicted_labels[0]  # Get the first predicted label
+                  confidence_score_percentage = max(confidence_scores[i]) * 100
+                  print(f"{green_checkmark} This patient in ICU has been classified as Sepsis {label} with confidence of: {confidence_score_percentage:.1f}%")
+         msg = "Execution went fine"
+         code = 1
+         pred = df.to_dict("records")
+    except Exception as e:
+        print(f"\033[91m{red_x} An exception occurred: {str(e)}")
+        msg = "Execution did not go well"
+        code = 0
+        pred = None
+    # Create the API response
+    result = {"Execution_msg": msg, "execution_code": code, "prediction": pred}
+    return result
+# Run the FastAPI application using uvicorn
+if __name__ == "__main__":
+    uvicorn.run("main:app", reload = False)