Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame: | |
wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"]) | |
cosine_similarities = cosine_similarity( | |
wine_row, df.drop(columns=["NAME", "cluster"]) | |
) | |
top_5_indices = cosine_similarities[0].argsort()[-6:-1] | |
res = df.iloc[top_5_indices][["NAME"]] | |
# Convert to list | |
res = res["NAME"].values.tolist() | |
return res | |
def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5): | |
user_cluster = df.loc[user, 'cluster'] | |
user_ratings = df.loc[user].drop('cluster') | |
user_unrated = user_ratings[user_ratings == 0].index | |
cluster_users = df[df['cluster'] == user_cluster] | |
cluster_users.drop(['cluster', 'user'], axis=1, inplace=True) | |
cluster_avg = cluster_users.mean() | |
cluster_avg = cluster_avg[user_unrated] | |
return cluster_avg.sort_values(ascending=False).keys()[:n].tolist() | |
def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str): | |
df = df2.copy() | |
user_ratings = df.loc[new_user].drop('cluster') | |
# calculate the similarity between the new user and all other users | |
similarity = df.drop(['cluster', 'user'], axis=1).apply( | |
lambda x: np.nanmean((x - user_ratings) ** 2), axis=1 | |
) | |
# sort the users by similarity | |
similarity = similarity.sort_values() | |
# get the most similar user | |
most_similar_user = similarity.keys()[1] | |
return int(df.loc[most_similar_user, 'cluster']) | |