Spaces:
Sleeping
Sleeping
import os | |
import hopsworks | |
import numpy as np | |
from catboost import CatBoostRegressor | |
class Predict: | |
def __init__(self): | |
""" Initialization code goes here: | |
- Download the model artifact | |
- Load the model | |
""" | |
# Add seed for reproducibility | |
np.random.seed(42) | |
project = hopsworks.login() | |
mr = project.get_model_registry() | |
dataset_api = project.get_dataset_api() | |
# Download model | |
my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max') | |
model_dir = my_model.download() | |
# Download feature engineering file | |
current_dir = os.getcwd() | |
feature_engineering_file = dataset_api.download( | |
"Resources/feature_engineering.py", overwrite=True | |
) | |
# Load the feature engineering file into the current path | |
import sys | |
sys.path.append(current_dir) | |
# Load the feature engineering file | |
from feature_engineering import engineering_all_features | |
self.engineering_all_features = engineering_all_features | |
# Print the feature engineering parameters | |
print(self.engineering_all_features) | |
# Load the saved model | |
self.model = CatBoostRegressor() | |
self.model.load_model(f"{model_dir}/model.json") | |
# Define the embedding features (same as in training) | |
self.embedding_features = ['designer_names', 'hashtags', 'description', 'title'] | |
def predict(self, inputs): | |
""" Serve predictions using the trained model""" | |
""" | |
Expects inputs as a dictionary with the same features used in training: | |
- designer_names (list of strings, in original API it's just a string) | |
- category_path (string) | |
- color (string) | |
- condition (string) | |
- followerno (int) | |
- hashtags (list of strings) | |
- description (string) | |
- title (string) | |
""" | |
labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"] | |
transformed_features = self.engineering_all_features([inputs], labels) | |
polars_df = transformed_features[0] | |
pandas_df = polars_df.to_pandas() | |
print(f"{pandas_df}=") | |
# Drop size column | |
pandas_df = pandas_df.drop(columns=["size"]) | |
# Make prediction | |
prediction = self.model.predict(pandas_df) | |
return float(prediction[0]) # Return single prediction value |