samlam111
Added seed to Predict class
c2fc6c3
import os
import hopsworks
import numpy as np
from catboost import CatBoostRegressor
class Predict:
def __init__(self):
""" Initialization code goes here:
- Download the model artifact
- Load the model
"""
# Add seed for reproducibility
np.random.seed(42)
project = hopsworks.login()
mr = project.get_model_registry()
dataset_api = project.get_dataset_api()
# Download model
my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max')
model_dir = my_model.download()
# Download feature engineering file
current_dir = os.getcwd()
feature_engineering_file = dataset_api.download(
"Resources/feature_engineering.py", overwrite=True
)
# Load the feature engineering file into the current path
import sys
sys.path.append(current_dir)
# Load the feature engineering file
from feature_engineering import engineering_all_features
self.engineering_all_features = engineering_all_features
# Print the feature engineering parameters
print(self.engineering_all_features)
# Load the saved model
self.model = CatBoostRegressor()
self.model.load_model(f"{model_dir}/model.json")
# Define the embedding features (same as in training)
self.embedding_features = ['designer_names', 'hashtags', 'description', 'title']
def predict(self, inputs):
""" Serve predictions using the trained model"""
"""
Expects inputs as a dictionary with the same features used in training:
- designer_names (list of strings, in original API it's just a string)
- category_path (string)
- color (string)
- condition (string)
- followerno (int)
- hashtags (list of strings)
- description (string)
- title (string)
"""
labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"]
transformed_features = self.engineering_all_features([inputs], labels)
polars_df = transformed_features[0]
pandas_df = polars_df.to_pandas()
print(f"{pandas_df}=")
# Drop size column
pandas_df = pandas_df.drop(columns=["size"])
# Make prediction
prediction = self.model.predict(pandas_df)
return float(prediction[0]) # Return single prediction value