Spaces:
Sleeping
Sleeping
File size: 2,587 Bytes
916ff05 c2fc6c3 916ff05 c2fc6c3 916ff05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import hopsworks
import numpy as np
from catboost import CatBoostRegressor
class Predict:
def __init__(self):
""" Initialization code goes here:
- Download the model artifact
- Load the model
"""
# Add seed for reproducibility
np.random.seed(42)
project = hopsworks.login()
mr = project.get_model_registry()
dataset_api = project.get_dataset_api()
# Download model
my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max')
model_dir = my_model.download()
# Download feature engineering file
current_dir = os.getcwd()
feature_engineering_file = dataset_api.download(
"Resources/feature_engineering.py", overwrite=True
)
# Load the feature engineering file into the current path
import sys
sys.path.append(current_dir)
# Load the feature engineering file
from feature_engineering import engineering_all_features
self.engineering_all_features = engineering_all_features
# Print the feature engineering parameters
print(self.engineering_all_features)
# Load the saved model
self.model = CatBoostRegressor()
self.model.load_model(f"{model_dir}/model.json")
# Define the embedding features (same as in training)
self.embedding_features = ['designer_names', 'hashtags', 'description', 'title']
def predict(self, inputs):
""" Serve predictions using the trained model"""
"""
Expects inputs as a dictionary with the same features used in training:
- designer_names (list of strings, in original API it's just a string)
- category_path (string)
- color (string)
- condition (string)
- followerno (int)
- hashtags (list of strings)
- description (string)
- title (string)
"""
labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"]
transformed_features = self.engineering_all_features([inputs], labels)
polars_df = transformed_features[0]
pandas_df = polars_df.to_pandas()
print(f"{pandas_df}=")
# Drop size column
pandas_df = pandas_df.drop(columns=["size"])
# Make prediction
prediction = self.model.predict(pandas_df)
return float(prediction[0]) # Return single prediction value |