File size: 1,569 Bytes
8724d8b
bd7b6e0
8724d8b
 
 
 
 
41173b4
8724d8b
 
 
 
41173b4
a729a18
 
a53f165
 
 
a729a18
a53f165
 
 
 
a729a18
 
bd7b6e0
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
    wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
    cosine_similarities = cosine_similarity(
        wine_row, df.drop(columns=["NAME", "cluster"])
    )
    top_5_indices = cosine_similarities[0].argsort()[-6:-1]
    res = df.iloc[top_5_indices][["NAME"]]
    # Convert to list
    res = res["NAME"].values.tolist()

    return res

def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5):
    user_cluster = df.loc[user, 'cluster']
    user_ratings = df.loc[user].drop('cluster')
    user_unrated = user_ratings[user_ratings == 0].index
    cluster_users = df[df['cluster'] == user_cluster]
    cluster_users.drop(['cluster', 'user'], axis=1, inplace=True)

    cluster_avg = cluster_users.mean()
    cluster_avg = cluster_avg[user_unrated]
    return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()

def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str):
    df = df2.copy()
    user_ratings = df.loc[new_user].drop('cluster')
    # calculate the similarity between the new user and all other users
    similarity = df.drop(['cluster', 'user'], axis=1).apply(
        lambda x: np.nanmean((x - user_ratings) ** 2), axis=1
    )
    # sort the users by similarity
    similarity = similarity.sort_values()
    # get the most similar user
    most_similar_user = similarity.keys()[1]
    return int(df.loc[most_similar_user, 'cluster'])