import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame: wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"]) cosine_similarities = cosine_similarity( wine_row, df.drop(columns=["NAME", "cluster"]) ) top_5_indices = cosine_similarities[0].argsort()[-6:-1] res = df.iloc[top_5_indices][["NAME"]] # Convert to list res = res["NAME"].values.tolist() return res def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5): user_cluster = df.loc[user, 'cluster'] user_ratings = df.loc[user].drop('cluster') user_unrated = user_ratings[user_ratings == 0].index cluster_users = df[df['cluster'] == user_cluster] cluster_users.drop(['cluster', 'user'], axis=1, inplace=True) cluster_avg = cluster_users.mean() cluster_avg = cluster_avg[user_unrated] return cluster_avg.sort_values(ascending=False).keys()[:n].tolist() def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str): df = df2.copy() user_ratings = df.loc[new_user].drop('cluster') # calculate the similarity between the new user and all other users similarity = df.drop(['cluster', 'user'], axis=1).apply( lambda x: np.nanmean((x - user_ratings) ** 2), axis=1 ) # sort the users by similarity similarity = similarity.sort_values() # get the most similar user most_similar_user = similarity.keys()[1] return int(df.loc[most_similar_user, 'cluster'])