Spaces:

SmitaGautam
/

GNN-Recommendation-System-Demo

Sleeping

App Files Files Community

SmitaGautam commited on Nov 24, 2024

Commit

5b1d214

verified ·

1 Parent(s): 029531e

Upload 5 files

Browse files

Files changed (5) hide show

app.py +134 -0
model_GCN.pth +3 -0
recommend.py +169 -0
requirements.txt +3 -0
u.item +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import gradio as gr
+from recommend import get_genres_movies_for_user, test_users
+label_value_mapping = {f"User {user_id}": user_id for user_id in test_users}
+# Define a function to generate colors based on genre
+def get_genre_color(movie_genres, top_genres):
+    genre_colors = {
+        'Action': '#F44336',        # Red: Energetic and intense
+        'Adventure': '#FF5722',     # Deep Orange: Exciting and daring
+        'Thriller': '#9C27B0',      # Purple: Suspenseful and mysterious
+        'Animation': '#FFEB3B',     # Yellow: Bright and cheerful
+        'Children': '#4CAF50',      # Green: Fresh, youthful, and playful
+        'Fantasy': '#673AB7',       # Deep Purple: Magical and imaginative
+        'Comedy': '#FFC107',        # Amber: Lighthearted and funny
+        'Musical': '#E91E63',       # Pink: Romantic and artistic
+        'Romance': '#FF4081',       # Light Pink: Love and tenderness
+        'Crime': '#607D8B',         # Blue Grey: Gritty and dark
+        'Mystery': '#8BC34A',       # Lime Green: Curious and investigative
+        'Film-Noir': '#212121',     # Black: Dark and moody
+        'Documentary': '#009688',   # Teal: Informative and grounded
+        'Drama': '#3F51B5',         # Blue: Serious and emotional
+        'Horror': '#B71C1C',        # Dark Red: Fearful and intense
+        'Sci-Fi': '#00BCD4',        # Cyan: Futuristic and techy
+        'War': '#795548',           # Brown: Rugged and historical
+        'Western': '#D7A87E',       # Tan: Rustic and wild
+    }
+    # Default color if no genre matches
+    default_color = '#9E9E9E'
+    if not top_genres:
+        return genre_colors.get(movie_genres[0], default_color)
+    # Check for the first genre in movie_genres that is also in top_genres
+    for genre in movie_genres:
+        if genre in top_genres and genre in genre_colors:
+            return genre_colors[genre]
+    # Check for the first valid genre in movie_genres that has a color
+    for genre in movie_genres:
+        if genre in genre_colors:
+            return genre_colors[genre]
+    # Return the default color if no matches
+    return default_color
+def get_movie_recommendations(user_id):
+    # Fetch the top genres and recommended movies based on user_id
+    user_id = label_value_mapping[user_id]
+    top_genres, movies = get_genres_movies_for_user(user_id)
+    genre_cards = "".join([
+        f"<div class='card' style='background-color:{get_genre_color([genre], None)};'>{genre}</div>"
+        for genre in top_genres
+    ])
+    movie_cards = "".join([
+        f"<div class='card movie-card' style='background-color:{get_genre_color(movie['genres'], top_genres)};'>"
+            f"<strong>{movie['title']}</strong><br><span>{', '.join(movie['genres'])}</span>"
+        "</div>"
+        for movie in movies
+    ])
+    return genre_cards, movie_cards
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.HTML("""
+        <style>
+            .gradio-container {
+                background-color: #FAFAFA;
+                font-family: 'Arial', sans-serif;
+                color: #333;
+            }
+            #center-title {
+                text-align: center;
+                font-size: 36px;
+                margin-bottom: 20px;
+                color: #2C3E50;
+            }
+            h1 {
+                font-size: 36px;
+                text-align: center;
+                margin-bottom: 20px;
+                color: #2C3E50;
+            }
+            h2 {
+                font-size: 16px;
+                margin-bottom: 10px;
+                color: #2C3E50;
+                text-align: center;
+            }
+            .card {
+                border: 1px solid #BBDEFB;
+                border-radius: 8px;
+                padding: 10px 15px;
+                margin: 5px;
+                box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
+                text-align: center;
+                font-size: 16px;
+                color: #FFFFFF;
+                width: 180px;
+                display: inline-block;
+            }
+            .movie-card {
+            }
+        </style>
+    """)
+    with gr.Row():
+        gr.Markdown("### Movie Recommendation System", elem_id="center-title")
+    with gr.Row():
+        user_id_input = gr.Dropdown(
+        label="Select a User",
+        choices=list(label_value_mapping.keys()),
+        value=list(label_value_mapping.keys())[0],  # Default value
+        interactive=True
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.HTML("<h2>Top Genres</h2>")
+            genres_output = gr.HTML(label="Top Genres")
+        with gr.Column(scale=1):
+            gr.HTML("<h2>Recommended Movies</h2>")
+            movies_output = gr.HTML(label="Recommended Movies")
+    user_id_input.change(get_movie_recommendations, inputs=user_id_input, outputs=[genres_output, movies_output])
+demo.launch()

model_GCN.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99f98f923bd3d833afe4ee48afa4c627ff6be09cd99a87f4dd4711cf33a9702a
+size 62353

recommend.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from torch_geometric.datasets import MovieLens100K
+from sklearn.model_selection import train_test_split
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+import numpy as np
+import pandas as pd
+from torch_geometric.nn import GCNConv, GATConv
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', filename='metrics.log')
+class GNN(torch.nn.Module):
+    def __init__(self, model_type, in_channels, hidden_channels, out_channels):
+        super(GNN, self).__init__()
+        self.model_type = model_type
+        self.fc1 = nn.Linear(user_features.shape[1], in_channels)
+        self.fc2 = nn.Linear(movie_features.shape[1], in_channels)
+        if model_type == 'GCN':
+            self.conv1 = GCNConv(in_channels, hidden_channels)
+            self.conv2 = GCNConv(hidden_channels, out_channels)
+            self.bn1= torch.nn.BatchNorm1d(hidden_channels)
+        elif model_type == 'GAT':
+            self.conv1 = GATConv(in_channels, hidden_channels, heads=2, concat=True)
+            self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=2, concat=True)
+            self.bn1= torch.nn.BatchNorm1d(hidden_channels*2)
+    def forward(self, x, y, edge_index):
+        x = self.fc1(x)
+        y = self.fc2(y)
+        z = torch.cat((x, y), dim=0)
+        z = F.relu(self.bn1(self.conv1(z, edge_index)))
+        z = self.conv2(z, edge_index)
+        return z
+# genres = {
+#     0 :['Action', 'Adventure', 'Thriller'],  # Action-packed and exciting
+#     1: ['Animation', 'Children', 'Fantasy'],  # Family-friendly and imaginative
+#     2: ['Comedy', 'Musical', 'Romance'],  # Lighthearted and feel-good
+#     3: ['Crime', 'Mystery', 'Film-Noir'],  # Dark, investigative, and gritty
+#     4: ['Documentary', 'Drama'],  # Realistic and serious storytelling
+#     5: ['Horror', 'Sci-Fi'],  # Fearful and futuristic
+#     6: ['War', 'Western']  # Historical and culturally specific
+# }
+gtypes = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy',
+    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
+    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
+genres = {ix: i for ix, i in enumerate(gtypes)}
+test_users = [327, 388, 404, 449, 707, 310, 605, 832, 850, 302, 523, 626, 774, 853, 522, 542, 680, 703, 929, 254, 526, 588, 884, 210, 275, 497, 507, 598, 825, 937, 311, 380, 448, 541, 885, 938, 409, 429, 433, 451, 534, 551, 585, 896, 33, 109, 120, 215, 261, 412, 425, 559, 615, 617, 829, 49, 78, 137, 192, 198, 281, 305, 394, 528, 669]
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+file_path = 'u.item'
+df = pd.read_csv(file_path, sep='|', header=None, encoding='latin-1')
+last_19_cols = df.columns[-19:]
+genre_columns = [
+    'Unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
+    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
+    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
+]
+df.rename(columns=dict(zip(last_19_cols, genre_columns)), inplace=True)
+df.rename(columns = {1: "info"}, inplace=True)
+df['Year'] = df['info'].str.extract(r'\((\d{4})\)')
+id_movie_map = df["info"].to_dict()
+movie_lens = MovieLens100K('./data/movie_lens')[0]
+movie_features = movie_lens["movie"]["x"]
+user_features = movie_lens["user"]["x"]
+data = movie_lens[("user", "rates", "movie")]
+mask = data["rating"] >= 3
+data_edge_index = data["edge_index"][:, mask]
+data_edge_label = data["rating"][mask]
+user_num_nodes = user_features.shape[0]
+train_nodes, testing_nodes = train_test_split(range(user_num_nodes), test_size=0.2, random_state=42)
+val_nodes, test_nodes = testing_nodes[:len(testing_nodes)//2], testing_nodes[len(testing_nodes)//2: ]
+Y = data_edge_index[0]
+val_mask = torch.isin(Y, torch.tensor(val_nodes))
+val_edge_index = data_edge_index[:, val_mask]
+test_mask = torch.isin(Y, torch.tensor(test_nodes))
+test_edge_index = data_edge_index[:, test_mask]
+user_features = user_features.to(device)
+movie_features = movie_features.to(device)
+val_edge_index = val_edge_index.to(device)
+test_edge_index = test_edge_index.to(device)
+val_edge_index[1] += (user_features.shape[0])
+test_edge_index[1] += (user_features.shape[0])
+model_type = "GCN"
+model = GNN(model_type, in_channels= 32, hidden_channels=128, out_channels=64)
+optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
+model=model.to(device)
+model.load_state_dict(torch.load('model_GCN.pth', map_location=torch.device('cpu')))
+model.eval()
+with torch.no_grad():
+  embeddings = model(user_features, movie_features, test_edge_index)
+cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+users = test_edge_index[0].unique()
+def display_scores(top_k, test_edges_q_indices):
+    p_10 =  sum(1 for i in top_k if i in test_edges_q_indices)
+    # p_5 =  sum(1 for i in top_k[:5] if i in test_edges_q_indices)
+    # p_1 =  1 if top_k[0] in test_edges_q_indices else 0
+    r_10 = p_10/len(test_edges_q_indices)
+    for rank, node in enumerate(top_k):
+        if node in test_edges_q_indices:
+            mrr = 1 / (rank + 1)
+            break
+    dcg = 0.0
+    for rank, node in enumerate(top_k, start=1):
+        if node in test_edges_q_indices:
+            dcg += 1 / np.log2(rank + 1)
+    ideal_relevant = min(len(test_edges_q_indices), 10)
+    idcg = sum(1 / np.log2(rank + 1) for rank in range(1, ideal_relevant + 1))
+    ndcg = dcg / idcg if idcg > 0 else 0.0
+    logging.info(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
+    print(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
+def get_genres_movies_for_user(user_id):
+    curr_node = torch.tensor(user_id)
+    self_emb = embeddings[curr_node]
+    itm = embeddings[user_features.shape[0]:]
+    similarities = cos(self_emb, itm)
+    sorted_indices = torch.argsort(similarities, descending=True)
+    sorted_indices = [i + user_features.shape[0] for i in sorted_indices]
+    test_edges_q_indices = test_edge_index[1][test_edge_index[0] == curr_node]
+    top_k = sorted_indices[:10]
+    display_scores(top_k, test_edges_q_indices)
+    pred_mids = [(i - user_features.shape[0]).item()  for i in top_k]
+    actual_mids = [(i - user_features.shape[0]).item()  for i in test_edges_q_indices]
+    # liked_movies = [id_movie_map[i] for i in actual_mids]
+    # pred_movies = [id_movie_map[i] for i in pred_mids]
+    user_genre = {i:0 for i in range(len(genres))}
+    for amid in actual_mids:
+        for gr_id, g in genres.items():
+            # for g in grp:
+            if df.iloc[amid][g] == 1:
+                user_genre[gr_id] += 1
+    sort_user_genre = sorted(user_genre.items(), key=lambda item: item[1], reverse=True)
+    sort_user_genre = [i for i, _ in sort_user_genre]
+    top_genre_user = [genres[sort_user_genre[0]], genres[sort_user_genre[1]], genres[sort_user_genre[2]]]
+    our_movies = []
+    for pmid in pred_mids:
+        mname = df.iloc[pmid]["info"]
+        movie = {}
+        movie["title"] = mname
+        tmp = []
+        for gr_id, g in genres.items():
+            # for g in grp:
+            if df.iloc[pmid][g] == 1:
+                tmp.append(g)
+        movie["genres"] = tmp
+        our_movies.append(movie)
+    return top_genre_user, our_movies

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+numpy
+torch-geometric

u.item ADDED Viewed

The diff for this file is too large to render. See raw diff