SmitaGautam commited on
Commit
5b1d214
·
verified ·
1 Parent(s): 029531e

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +134 -0
  2. model_GCN.pth +3 -0
  3. recommend.py +169 -0
  4. requirements.txt +3 -0
  5. u.item +0 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from recommend import get_genres_movies_for_user, test_users
3
+
4
+ label_value_mapping = {f"User {user_id}": user_id for user_id in test_users}
5
+
6
+ # Define a function to generate colors based on genre
7
+ def get_genre_color(movie_genres, top_genres):
8
+
9
+ genre_colors = {
10
+ 'Action': '#F44336', # Red: Energetic and intense
11
+ 'Adventure': '#FF5722', # Deep Orange: Exciting and daring
12
+ 'Thriller': '#9C27B0', # Purple: Suspenseful and mysterious
13
+ 'Animation': '#FFEB3B', # Yellow: Bright and cheerful
14
+ 'Children': '#4CAF50', # Green: Fresh, youthful, and playful
15
+ 'Fantasy': '#673AB7', # Deep Purple: Magical and imaginative
16
+ 'Comedy': '#FFC107', # Amber: Lighthearted and funny
17
+ 'Musical': '#E91E63', # Pink: Romantic and artistic
18
+ 'Romance': '#FF4081', # Light Pink: Love and tenderness
19
+ 'Crime': '#607D8B', # Blue Grey: Gritty and dark
20
+ 'Mystery': '#8BC34A', # Lime Green: Curious and investigative
21
+ 'Film-Noir': '#212121', # Black: Dark and moody
22
+ 'Documentary': '#009688', # Teal: Informative and grounded
23
+ 'Drama': '#3F51B5', # Blue: Serious and emotional
24
+ 'Horror': '#B71C1C', # Dark Red: Fearful and intense
25
+ 'Sci-Fi': '#00BCD4', # Cyan: Futuristic and techy
26
+ 'War': '#795548', # Brown: Rugged and historical
27
+ 'Western': '#D7A87E', # Tan: Rustic and wild
28
+ }
29
+
30
+ # Default color if no genre matches
31
+ default_color = '#9E9E9E'
32
+
33
+ if not top_genres:
34
+ return genre_colors.get(movie_genres[0], default_color)
35
+
36
+ # Check for the first genre in movie_genres that is also in top_genres
37
+ for genre in movie_genres:
38
+ if genre in top_genres and genre in genre_colors:
39
+ return genre_colors[genre]
40
+
41
+ # Check for the first valid genre in movie_genres that has a color
42
+ for genre in movie_genres:
43
+ if genre in genre_colors:
44
+ return genre_colors[genre]
45
+
46
+ # Return the default color if no matches
47
+ return default_color
48
+
49
+
50
+ def get_movie_recommendations(user_id):
51
+ # Fetch the top genres and recommended movies based on user_id
52
+ user_id = label_value_mapping[user_id]
53
+ top_genres, movies = get_genres_movies_for_user(user_id)
54
+
55
+ genre_cards = "".join([
56
+ f"<div class='card' style='background-color:{get_genre_color([genre], None)};'>{genre}</div>"
57
+ for genre in top_genres
58
+ ])
59
+
60
+ movie_cards = "".join([
61
+ f"<div class='card movie-card' style='background-color:{get_genre_color(movie['genres'], top_genres)};'>"
62
+ f"<strong>{movie['title']}</strong><br><span>{', '.join(movie['genres'])}</span>"
63
+ "</div>"
64
+ for movie in movies
65
+ ])
66
+
67
+ return genre_cards, movie_cards
68
+
69
+ # Define the Gradio interface
70
+ with gr.Blocks() as demo:
71
+ gr.HTML("""
72
+ <style>
73
+ .gradio-container {
74
+ background-color: #FAFAFA;
75
+ font-family: 'Arial', sans-serif;
76
+ color: #333;
77
+ }
78
+ #center-title {
79
+ text-align: center;
80
+ font-size: 36px;
81
+ margin-bottom: 20px;
82
+ color: #2C3E50;
83
+ }
84
+ h1 {
85
+ font-size: 36px;
86
+ text-align: center;
87
+ margin-bottom: 20px;
88
+ color: #2C3E50;
89
+ }
90
+ h2 {
91
+ font-size: 16px;
92
+ margin-bottom: 10px;
93
+ color: #2C3E50;
94
+ text-align: center;
95
+ }
96
+ .card {
97
+ border: 1px solid #BBDEFB;
98
+ border-radius: 8px;
99
+ padding: 10px 15px;
100
+ margin: 5px;
101
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
102
+ text-align: center;
103
+ font-size: 16px;
104
+ color: #FFFFFF;
105
+ width: 180px;
106
+ display: inline-block;
107
+ }
108
+ .movie-card {
109
+
110
+ }
111
+ </style>
112
+ """)
113
+ with gr.Row():
114
+ gr.Markdown("### Movie Recommendation System", elem_id="center-title")
115
+
116
+ with gr.Row():
117
+ user_id_input = gr.Dropdown(
118
+ label="Select a User",
119
+ choices=list(label_value_mapping.keys()),
120
+ value=list(label_value_mapping.keys())[0], # Default value
121
+ interactive=True
122
+ )
123
+
124
+ with gr.Row():
125
+ with gr.Column(scale=1):
126
+ gr.HTML("<h2>Top Genres</h2>")
127
+ genres_output = gr.HTML(label="Top Genres")
128
+ with gr.Column(scale=1):
129
+ gr.HTML("<h2>Recommended Movies</h2>")
130
+ movies_output = gr.HTML(label="Recommended Movies")
131
+
132
+ user_id_input.change(get_movie_recommendations, inputs=user_id_input, outputs=[genres_output, movies_output])
133
+
134
+ demo.launch()
model_GCN.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f98f923bd3d833afe4ee48afa4c627ff6be09cd99a87f4dd4711cf33a9702a
3
+ size 62353
recommend.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch_geometric.datasets import MovieLens100K
2
+ from sklearn.model_selection import train_test_split
3
+ import torch.nn as nn
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+ import pandas as pd
8
+ from torch_geometric.nn import GCNConv, GATConv
9
+ import logging
10
+
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', filename='metrics.log')
12
+
13
+ class GNN(torch.nn.Module):
14
+ def __init__(self, model_type, in_channels, hidden_channels, out_channels):
15
+ super(GNN, self).__init__()
16
+ self.model_type = model_type
17
+ self.fc1 = nn.Linear(user_features.shape[1], in_channels)
18
+ self.fc2 = nn.Linear(movie_features.shape[1], in_channels)
19
+ if model_type == 'GCN':
20
+ self.conv1 = GCNConv(in_channels, hidden_channels)
21
+ self.conv2 = GCNConv(hidden_channels, out_channels)
22
+ self.bn1= torch.nn.BatchNorm1d(hidden_channels)
23
+ elif model_type == 'GAT':
24
+ self.conv1 = GATConv(in_channels, hidden_channels, heads=2, concat=True)
25
+ self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=2, concat=True)
26
+ self.bn1= torch.nn.BatchNorm1d(hidden_channels*2)
27
+
28
+ def forward(self, x, y, edge_index):
29
+ x = self.fc1(x)
30
+ y = self.fc2(y)
31
+ z = torch.cat((x, y), dim=0)
32
+ z = F.relu(self.bn1(self.conv1(z, edge_index)))
33
+ z = self.conv2(z, edge_index)
34
+ return z
35
+
36
+ # genres = {
37
+ # 0 :['Action', 'Adventure', 'Thriller'], # Action-packed and exciting
38
+ # 1: ['Animation', 'Children', 'Fantasy'], # Family-friendly and imaginative
39
+ # 2: ['Comedy', 'Musical', 'Romance'], # Lighthearted and feel-good
40
+ # 3: ['Crime', 'Mystery', 'Film-Noir'], # Dark, investigative, and gritty
41
+ # 4: ['Documentary', 'Drama'], # Realistic and serious storytelling
42
+ # 5: ['Horror', 'Sci-Fi'], # Fearful and futuristic
43
+ # 6: ['War', 'Western'] # Historical and culturally specific
44
+ # }
45
+ gtypes = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy',
46
+ 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
47
+ 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
48
+ genres = {ix: i for ix, i in enumerate(gtypes)}
49
+
50
+ test_users = [327, 388, 404, 449, 707, 310, 605, 832, 850, 302, 523, 626, 774, 853, 522, 542, 680, 703, 929, 254, 526, 588, 884, 210, 275, 497, 507, 598, 825, 937, 311, 380, 448, 541, 885, 938, 409, 429, 433, 451, 534, 551, 585, 896, 33, 109, 120, 215, 261, 412, 425, 559, 615, 617, 829, 49, 78, 137, 192, 198, 281, 305, 394, 528, 669]
51
+
52
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
53
+
54
+ file_path = 'u.item'
55
+ df = pd.read_csv(file_path, sep='|', header=None, encoding='latin-1')
56
+ last_19_cols = df.columns[-19:]
57
+ genre_columns = [
58
+ 'Unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
59
+ 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
60
+ 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
61
+ ]
62
+
63
+ df.rename(columns=dict(zip(last_19_cols, genre_columns)), inplace=True)
64
+ df.rename(columns = {1: "info"}, inplace=True)
65
+ df['Year'] = df['info'].str.extract(r'\((\d{4})\)')
66
+ id_movie_map = df["info"].to_dict()
67
+
68
+ movie_lens = MovieLens100K('./data/movie_lens')[0]
69
+ movie_features = movie_lens["movie"]["x"]
70
+ user_features = movie_lens["user"]["x"]
71
+ data = movie_lens[("user", "rates", "movie")]
72
+ mask = data["rating"] >= 3
73
+ data_edge_index = data["edge_index"][:, mask]
74
+ data_edge_label = data["rating"][mask]
75
+
76
+ user_num_nodes = user_features.shape[0]
77
+ train_nodes, testing_nodes = train_test_split(range(user_num_nodes), test_size=0.2, random_state=42)
78
+ val_nodes, test_nodes = testing_nodes[:len(testing_nodes)//2], testing_nodes[len(testing_nodes)//2: ]
79
+
80
+ Y = data_edge_index[0]
81
+ val_mask = torch.isin(Y, torch.tensor(val_nodes))
82
+ val_edge_index = data_edge_index[:, val_mask]
83
+ test_mask = torch.isin(Y, torch.tensor(test_nodes))
84
+ test_edge_index = data_edge_index[:, test_mask]
85
+
86
+ user_features = user_features.to(device)
87
+ movie_features = movie_features.to(device)
88
+
89
+ val_edge_index = val_edge_index.to(device)
90
+ test_edge_index = test_edge_index.to(device)
91
+
92
+ val_edge_index[1] += (user_features.shape[0])
93
+ test_edge_index[1] += (user_features.shape[0])
94
+
95
+ model_type = "GCN"
96
+ model = GNN(model_type, in_channels= 32, hidden_channels=128, out_channels=64)
97
+ optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
98
+ model=model.to(device)
99
+
100
+ model.load_state_dict(torch.load('model_GCN.pth', map_location=torch.device('cpu')))
101
+
102
+ model.eval()
103
+ with torch.no_grad():
104
+ embeddings = model(user_features, movie_features, test_edge_index)
105
+
106
+ cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
107
+
108
+ users = test_edge_index[0].unique()
109
+
110
+ def display_scores(top_k, test_edges_q_indices):
111
+ p_10 = sum(1 for i in top_k if i in test_edges_q_indices)
112
+ # p_5 = sum(1 for i in top_k[:5] if i in test_edges_q_indices)
113
+ # p_1 = 1 if top_k[0] in test_edges_q_indices else 0
114
+ r_10 = p_10/len(test_edges_q_indices)
115
+ for rank, node in enumerate(top_k):
116
+ if node in test_edges_q_indices:
117
+ mrr = 1 / (rank + 1)
118
+ break
119
+ dcg = 0.0
120
+ for rank, node in enumerate(top_k, start=1):
121
+ if node in test_edges_q_indices:
122
+ dcg += 1 / np.log2(rank + 1)
123
+ ideal_relevant = min(len(test_edges_q_indices), 10)
124
+ idcg = sum(1 / np.log2(rank + 1) for rank in range(1, ideal_relevant + 1))
125
+ ndcg = dcg / idcg if idcg > 0 else 0.0
126
+ logging.info(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
127
+ print(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
128
+
129
+ def get_genres_movies_for_user(user_id):
130
+ curr_node = torch.tensor(user_id)
131
+ self_emb = embeddings[curr_node]
132
+ itm = embeddings[user_features.shape[0]:]
133
+ similarities = cos(self_emb, itm)
134
+ sorted_indices = torch.argsort(similarities, descending=True)
135
+ sorted_indices = [i + user_features.shape[0] for i in sorted_indices]
136
+ test_edges_q_indices = test_edge_index[1][test_edge_index[0] == curr_node]
137
+ top_k = sorted_indices[:10]
138
+
139
+ display_scores(top_k, test_edges_q_indices)
140
+
141
+ pred_mids = [(i - user_features.shape[0]).item() for i in top_k]
142
+ actual_mids = [(i - user_features.shape[0]).item() for i in test_edges_q_indices]
143
+ # liked_movies = [id_movie_map[i] for i in actual_mids]
144
+ # pred_movies = [id_movie_map[i] for i in pred_mids]
145
+
146
+ user_genre = {i:0 for i in range(len(genres))}
147
+ for amid in actual_mids:
148
+ for gr_id, g in genres.items():
149
+ # for g in grp:
150
+ if df.iloc[amid][g] == 1:
151
+ user_genre[gr_id] += 1
152
+ sort_user_genre = sorted(user_genre.items(), key=lambda item: item[1], reverse=True)
153
+ sort_user_genre = [i for i, _ in sort_user_genre]
154
+ top_genre_user = [genres[sort_user_genre[0]], genres[sort_user_genre[1]], genres[sort_user_genre[2]]]
155
+
156
+ our_movies = []
157
+ for pmid in pred_mids:
158
+ mname = df.iloc[pmid]["info"]
159
+ movie = {}
160
+ movie["title"] = mname
161
+ tmp = []
162
+ for gr_id, g in genres.items():
163
+ # for g in grp:
164
+ if df.iloc[pmid][g] == 1:
165
+ tmp.append(g)
166
+ movie["genres"] = tmp
167
+ our_movies.append(movie)
168
+
169
+ return top_genre_user, our_movies
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ numpy
3
+ torch-geometric
u.item ADDED
The diff for this file is too large to render. See raw diff