Spaces:

FranciscoLozDataScience
/

smoker_model

Sleeping

File size: 1,851 Bytes

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from joblib import load
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

class SmokerModel:
    def __init__(self, model_path, scaler_path):
        self.model = load(model_path)
        self.scaler = load(scaler_path)
        self.labels = ["non-smoker", "smoker"]
        
    def scale(self, X):
        """
        Apply the scaler used to train the model to the new data

        INPUT
        -----
        X: the data to be scaled
        
        OUTPUT
        ------
        returns the scaled data
        """

        new_data_scaled = self.scaler.transform(X)

        return new_data_scaled

    def predict(self, X: np.ndarray) -> str: #TODO: change type to pd df
        """
        Make a prediction on one sample using the loaded model.

        INPUT
        -----
        X: the data to predict a label for

        OUTPUT
        ------
        predicted label
        """

        # scale the data
        X_scaled = self.scale(X)

        # Check if the array is 1-dimensional aka one sample
        if len(X_scaled.shape) != 1:
            raise ValueError("Input array must be one-dimensional (one sample), but got a shape of {}".format(X.shape))
            return
            
        # Reshape the array
        # X = X.reshape(1, -1)  
        X_scaled = X_scaled.reshape(1, -1) 

        # # scale the data
        # X_scaled = self.scale(X)

        # Now, use the scaled data to make predictions using the loaded model
        array = self.model.predict(X_scaled)

        #predict
        predicted_label = array[0]
        str_label = self.labels[predicted_label]
        return str_label