showcase / app /core.py
rufimelo's picture
yolo
bd7b6e0
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
cosine_similarities = cosine_similarity(
wine_row, df.drop(columns=["NAME", "cluster"])
)
top_5_indices = cosine_similarities[0].argsort()[-6:-1]
res = df.iloc[top_5_indices][["NAME"]]
# Convert to list
res = res["NAME"].values.tolist()
return res
def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5):
user_cluster = df.loc[user, 'cluster']
user_ratings = df.loc[user].drop('cluster')
user_unrated = user_ratings[user_ratings == 0].index
cluster_users = df[df['cluster'] == user_cluster]
cluster_users.drop(['cluster', 'user'], axis=1, inplace=True)
cluster_avg = cluster_users.mean()
cluster_avg = cluster_avg[user_unrated]
return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str):
df = df2.copy()
user_ratings = df.loc[new_user].drop('cluster')
# calculate the similarity between the new user and all other users
similarity = df.drop(['cluster', 'user'], axis=1).apply(
lambda x: np.nanmean((x - user_ratings) ** 2), axis=1
)
# sort the users by similarity
similarity = similarity.sort_values()
# get the most similar user
most_similar_user = similarity.keys()[1]
return int(df.loc[most_similar_user, 'cluster'])