import os import hopsworks import numpy as np from catboost import CatBoostRegressor class Predict: def __init__(self): """ Initialization code goes here: - Download the model artifact - Load the model """ # Add seed for reproducibility np.random.seed(42) project = hopsworks.login() mr = project.get_model_registry() dataset_api = project.get_dataset_api() # Download model my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max') model_dir = my_model.download() # Download feature engineering file current_dir = os.getcwd() feature_engineering_file = dataset_api.download( "Resources/feature_engineering.py", overwrite=True ) # Load the feature engineering file into the current path import sys sys.path.append(current_dir) # Load the feature engineering file from feature_engineering import engineering_all_features self.engineering_all_features = engineering_all_features # Print the feature engineering parameters print(self.engineering_all_features) # Load the saved model self.model = CatBoostRegressor() self.model.load_model(f"{model_dir}/model.json") # Define the embedding features (same as in training) self.embedding_features = ['designer_names', 'hashtags', 'description', 'title'] def predict(self, inputs): """ Serve predictions using the trained model""" """ Expects inputs as a dictionary with the same features used in training: - designer_names (list of strings, in original API it's just a string) - category_path (string) - color (string) - condition (string) - followerno (int) - hashtags (list of strings) - description (string) - title (string) """ labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"] transformed_features = self.engineering_all_features([inputs], labels) polars_df = transformed_features[0] pandas_df = polars_df.to_pandas() print(f"{pandas_df}=") # Drop size column pandas_df = pandas_df.drop(columns=["size"]) # Make prediction prediction = self.model.predict(pandas_df) return float(prediction[0]) # Return single prediction value