File size: 1,800 Bytes
b7d3682
 
 
 
 
 
 
 
 
 
 
fabb2c0
 
 
 
 
 
 
 
b7d3682
 
 
7f67122
b7d3682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fabb2c0
b7d3682
1cea5f0
b7d3682
 
 
 
 
 
 
 
 
 
6195581
 
 
f364b27
 
 
1cea5f0
b7d3682
 
7f67122
b7d3682
7f67122
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from joblib import load
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

class SmokerModel:
    """
    Smoker Model Class that can predict new instances

    INPUTS
    ---
    model_path: the path to the model file
    scaler_path: the path to the min max scaler file
    """
    def __init__(self, model_path, scaler_path):
        self.model = load(model_path)
        self.scaler = load(scaler_path)
        self.labels = ["non-smoker", "smoker"]
        
    def scale(self, X):
        """
        Apply the scaler used to train the model to the new data

        INPUT
        -----
        X: the data to be scaled
        
        OUTPUT
        ------
        returns the scaled data
        """

        new_data_scaled = self.scaler.transform(X)

        return new_data_scaled

    def predict(self, X: pd.DataFrame) -> str:
        """
        Make a prediction on one sample using the loaded model.

        INPUT
        -----
        X: the data to predict a label for

        OUTPUT
        ------
        predicted label
        """

        # scale the data
        X_scaled = self.scale(X)

        #check array only has one sample
        if X.shape[0] != 1:
            raise ValueError("Input array must contain only one sample, but {} samples were found".format(X.shape[0]))
            return

        # Now, use the scaled data to make predictions using the loaded model
        array = self.model.predict(X_scaled)

        #predict
        predicted_label = array[0]
        str_label = self.labels[predicted_label]
        return str_label