Spaces:

samlama111
/

fashion-price-predictor

Sleeping

samlam111

Added seed to Predict class

c2fc6c3 6 months ago

2.59 kB

	import os
	import hopsworks
	import numpy as np
	from catboost import CatBoostRegressor

	class Predict:
	def __init__(self):
	""" Initialization code goes here:
	- Download the model artifact
	- Load the model
	"""
	# Add seed for reproducibility
	np.random.seed(42)

	project = hopsworks.login()
	mr = project.get_model_registry()

	dataset_api = project.get_dataset_api()

	# Download model
	my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max')
	model_dir = my_model.download()

	# Download feature engineering file
	current_dir = os.getcwd()
	feature_engineering_file = dataset_api.download(
	"Resources/feature_engineering.py", overwrite=True
	)
	# Load the feature engineering file into the current path
	import sys
	sys.path.append(current_dir)

	# Load the feature engineering file
	from feature_engineering import engineering_all_features
	self.engineering_all_features = engineering_all_features
	# Print the feature engineering parameters
	print(self.engineering_all_features)

	# Load the saved model
	self.model = CatBoostRegressor()
	self.model.load_model(f"{model_dir}/model.json")

	# Define the embedding features (same as in training)
	self.embedding_features = ['designer_names', 'hashtags', 'description', 'title']

	def predict(self, inputs):
	""" Serve predictions using the trained model"""
	"""
	Expects inputs as a dictionary with the same features used in training:
	- designer_names (list of strings, in original API it's just a string)
	- category_path (string)
	- color (string)
	- condition (string)
	- followerno (int)
	- hashtags (list of strings)
	- description (string)
	- title (string)
	"""
	labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"]
	transformed_features = self.engineering_all_features([inputs], labels)
	polars_df = transformed_features[0]
	pandas_df = polars_df.to_pandas()
	print(f"{pandas_df}=")

	# Drop size column
	pandas_df = pandas_df.drop(columns=["size"])

	# Make prediction
	prediction = self.model.predict(pandas_df)

	return float(prediction[0]) # Return single prediction value