SmitaGautam
commited on
Upload 5 files
Browse files- app.py +134 -0
- model_GCN.pth +3 -0
- recommend.py +169 -0
- requirements.txt +3 -0
- u.item +0 -0
app.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from recommend import get_genres_movies_for_user, test_users
|
3 |
+
|
4 |
+
label_value_mapping = {f"User {user_id}": user_id for user_id in test_users}
|
5 |
+
|
6 |
+
# Define a function to generate colors based on genre
|
7 |
+
def get_genre_color(movie_genres, top_genres):
|
8 |
+
|
9 |
+
genre_colors = {
|
10 |
+
'Action': '#F44336', # Red: Energetic and intense
|
11 |
+
'Adventure': '#FF5722', # Deep Orange: Exciting and daring
|
12 |
+
'Thriller': '#9C27B0', # Purple: Suspenseful and mysterious
|
13 |
+
'Animation': '#FFEB3B', # Yellow: Bright and cheerful
|
14 |
+
'Children': '#4CAF50', # Green: Fresh, youthful, and playful
|
15 |
+
'Fantasy': '#673AB7', # Deep Purple: Magical and imaginative
|
16 |
+
'Comedy': '#FFC107', # Amber: Lighthearted and funny
|
17 |
+
'Musical': '#E91E63', # Pink: Romantic and artistic
|
18 |
+
'Romance': '#FF4081', # Light Pink: Love and tenderness
|
19 |
+
'Crime': '#607D8B', # Blue Grey: Gritty and dark
|
20 |
+
'Mystery': '#8BC34A', # Lime Green: Curious and investigative
|
21 |
+
'Film-Noir': '#212121', # Black: Dark and moody
|
22 |
+
'Documentary': '#009688', # Teal: Informative and grounded
|
23 |
+
'Drama': '#3F51B5', # Blue: Serious and emotional
|
24 |
+
'Horror': '#B71C1C', # Dark Red: Fearful and intense
|
25 |
+
'Sci-Fi': '#00BCD4', # Cyan: Futuristic and techy
|
26 |
+
'War': '#795548', # Brown: Rugged and historical
|
27 |
+
'Western': '#D7A87E', # Tan: Rustic and wild
|
28 |
+
}
|
29 |
+
|
30 |
+
# Default color if no genre matches
|
31 |
+
default_color = '#9E9E9E'
|
32 |
+
|
33 |
+
if not top_genres:
|
34 |
+
return genre_colors.get(movie_genres[0], default_color)
|
35 |
+
|
36 |
+
# Check for the first genre in movie_genres that is also in top_genres
|
37 |
+
for genre in movie_genres:
|
38 |
+
if genre in top_genres and genre in genre_colors:
|
39 |
+
return genre_colors[genre]
|
40 |
+
|
41 |
+
# Check for the first valid genre in movie_genres that has a color
|
42 |
+
for genre in movie_genres:
|
43 |
+
if genre in genre_colors:
|
44 |
+
return genre_colors[genre]
|
45 |
+
|
46 |
+
# Return the default color if no matches
|
47 |
+
return default_color
|
48 |
+
|
49 |
+
|
50 |
+
def get_movie_recommendations(user_id):
|
51 |
+
# Fetch the top genres and recommended movies based on user_id
|
52 |
+
user_id = label_value_mapping[user_id]
|
53 |
+
top_genres, movies = get_genres_movies_for_user(user_id)
|
54 |
+
|
55 |
+
genre_cards = "".join([
|
56 |
+
f"<div class='card' style='background-color:{get_genre_color([genre], None)};'>{genre}</div>"
|
57 |
+
for genre in top_genres
|
58 |
+
])
|
59 |
+
|
60 |
+
movie_cards = "".join([
|
61 |
+
f"<div class='card movie-card' style='background-color:{get_genre_color(movie['genres'], top_genres)};'>"
|
62 |
+
f"<strong>{movie['title']}</strong><br><span>{', '.join(movie['genres'])}</span>"
|
63 |
+
"</div>"
|
64 |
+
for movie in movies
|
65 |
+
])
|
66 |
+
|
67 |
+
return genre_cards, movie_cards
|
68 |
+
|
69 |
+
# Define the Gradio interface
|
70 |
+
with gr.Blocks() as demo:
|
71 |
+
gr.HTML("""
|
72 |
+
<style>
|
73 |
+
.gradio-container {
|
74 |
+
background-color: #FAFAFA;
|
75 |
+
font-family: 'Arial', sans-serif;
|
76 |
+
color: #333;
|
77 |
+
}
|
78 |
+
#center-title {
|
79 |
+
text-align: center;
|
80 |
+
font-size: 36px;
|
81 |
+
margin-bottom: 20px;
|
82 |
+
color: #2C3E50;
|
83 |
+
}
|
84 |
+
h1 {
|
85 |
+
font-size: 36px;
|
86 |
+
text-align: center;
|
87 |
+
margin-bottom: 20px;
|
88 |
+
color: #2C3E50;
|
89 |
+
}
|
90 |
+
h2 {
|
91 |
+
font-size: 16px;
|
92 |
+
margin-bottom: 10px;
|
93 |
+
color: #2C3E50;
|
94 |
+
text-align: center;
|
95 |
+
}
|
96 |
+
.card {
|
97 |
+
border: 1px solid #BBDEFB;
|
98 |
+
border-radius: 8px;
|
99 |
+
padding: 10px 15px;
|
100 |
+
margin: 5px;
|
101 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
|
102 |
+
text-align: center;
|
103 |
+
font-size: 16px;
|
104 |
+
color: #FFFFFF;
|
105 |
+
width: 180px;
|
106 |
+
display: inline-block;
|
107 |
+
}
|
108 |
+
.movie-card {
|
109 |
+
|
110 |
+
}
|
111 |
+
</style>
|
112 |
+
""")
|
113 |
+
with gr.Row():
|
114 |
+
gr.Markdown("### Movie Recommendation System", elem_id="center-title")
|
115 |
+
|
116 |
+
with gr.Row():
|
117 |
+
user_id_input = gr.Dropdown(
|
118 |
+
label="Select a User",
|
119 |
+
choices=list(label_value_mapping.keys()),
|
120 |
+
value=list(label_value_mapping.keys())[0], # Default value
|
121 |
+
interactive=True
|
122 |
+
)
|
123 |
+
|
124 |
+
with gr.Row():
|
125 |
+
with gr.Column(scale=1):
|
126 |
+
gr.HTML("<h2>Top Genres</h2>")
|
127 |
+
genres_output = gr.HTML(label="Top Genres")
|
128 |
+
with gr.Column(scale=1):
|
129 |
+
gr.HTML("<h2>Recommended Movies</h2>")
|
130 |
+
movies_output = gr.HTML(label="Recommended Movies")
|
131 |
+
|
132 |
+
user_id_input.change(get_movie_recommendations, inputs=user_id_input, outputs=[genres_output, movies_output])
|
133 |
+
|
134 |
+
demo.launch()
|
model_GCN.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99f98f923bd3d833afe4ee48afa4c627ff6be09cd99a87f4dd4711cf33a9702a
|
3 |
+
size 62353
|
recommend.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch_geometric.datasets import MovieLens100K
|
2 |
+
from sklearn.model_selection import train_test_split
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch
|
5 |
+
import torch.nn.functional as F
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
from torch_geometric.nn import GCNConv, GATConv
|
9 |
+
import logging
|
10 |
+
|
11 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', filename='metrics.log')
|
12 |
+
|
13 |
+
class GNN(torch.nn.Module):
|
14 |
+
def __init__(self, model_type, in_channels, hidden_channels, out_channels):
|
15 |
+
super(GNN, self).__init__()
|
16 |
+
self.model_type = model_type
|
17 |
+
self.fc1 = nn.Linear(user_features.shape[1], in_channels)
|
18 |
+
self.fc2 = nn.Linear(movie_features.shape[1], in_channels)
|
19 |
+
if model_type == 'GCN':
|
20 |
+
self.conv1 = GCNConv(in_channels, hidden_channels)
|
21 |
+
self.conv2 = GCNConv(hidden_channels, out_channels)
|
22 |
+
self.bn1= torch.nn.BatchNorm1d(hidden_channels)
|
23 |
+
elif model_type == 'GAT':
|
24 |
+
self.conv1 = GATConv(in_channels, hidden_channels, heads=2, concat=True)
|
25 |
+
self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=2, concat=True)
|
26 |
+
self.bn1= torch.nn.BatchNorm1d(hidden_channels*2)
|
27 |
+
|
28 |
+
def forward(self, x, y, edge_index):
|
29 |
+
x = self.fc1(x)
|
30 |
+
y = self.fc2(y)
|
31 |
+
z = torch.cat((x, y), dim=0)
|
32 |
+
z = F.relu(self.bn1(self.conv1(z, edge_index)))
|
33 |
+
z = self.conv2(z, edge_index)
|
34 |
+
return z
|
35 |
+
|
36 |
+
# genres = {
|
37 |
+
# 0 :['Action', 'Adventure', 'Thriller'], # Action-packed and exciting
|
38 |
+
# 1: ['Animation', 'Children', 'Fantasy'], # Family-friendly and imaginative
|
39 |
+
# 2: ['Comedy', 'Musical', 'Romance'], # Lighthearted and feel-good
|
40 |
+
# 3: ['Crime', 'Mystery', 'Film-Noir'], # Dark, investigative, and gritty
|
41 |
+
# 4: ['Documentary', 'Drama'], # Realistic and serious storytelling
|
42 |
+
# 5: ['Horror', 'Sci-Fi'], # Fearful and futuristic
|
43 |
+
# 6: ['War', 'Western'] # Historical and culturally specific
|
44 |
+
# }
|
45 |
+
gtypes = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy',
|
46 |
+
'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
|
47 |
+
'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
|
48 |
+
genres = {ix: i for ix, i in enumerate(gtypes)}
|
49 |
+
|
50 |
+
test_users = [327, 388, 404, 449, 707, 310, 605, 832, 850, 302, 523, 626, 774, 853, 522, 542, 680, 703, 929, 254, 526, 588, 884, 210, 275, 497, 507, 598, 825, 937, 311, 380, 448, 541, 885, 938, 409, 429, 433, 451, 534, 551, 585, 896, 33, 109, 120, 215, 261, 412, 425, 559, 615, 617, 829, 49, 78, 137, 192, 198, 281, 305, 394, 528, 669]
|
51 |
+
|
52 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
53 |
+
|
54 |
+
file_path = 'u.item'
|
55 |
+
df = pd.read_csv(file_path, sep='|', header=None, encoding='latin-1')
|
56 |
+
last_19_cols = df.columns[-19:]
|
57 |
+
genre_columns = [
|
58 |
+
'Unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
|
59 |
+
'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
|
60 |
+
'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
|
61 |
+
]
|
62 |
+
|
63 |
+
df.rename(columns=dict(zip(last_19_cols, genre_columns)), inplace=True)
|
64 |
+
df.rename(columns = {1: "info"}, inplace=True)
|
65 |
+
df['Year'] = df['info'].str.extract(r'\((\d{4})\)')
|
66 |
+
id_movie_map = df["info"].to_dict()
|
67 |
+
|
68 |
+
movie_lens = MovieLens100K('./data/movie_lens')[0]
|
69 |
+
movie_features = movie_lens["movie"]["x"]
|
70 |
+
user_features = movie_lens["user"]["x"]
|
71 |
+
data = movie_lens[("user", "rates", "movie")]
|
72 |
+
mask = data["rating"] >= 3
|
73 |
+
data_edge_index = data["edge_index"][:, mask]
|
74 |
+
data_edge_label = data["rating"][mask]
|
75 |
+
|
76 |
+
user_num_nodes = user_features.shape[0]
|
77 |
+
train_nodes, testing_nodes = train_test_split(range(user_num_nodes), test_size=0.2, random_state=42)
|
78 |
+
val_nodes, test_nodes = testing_nodes[:len(testing_nodes)//2], testing_nodes[len(testing_nodes)//2: ]
|
79 |
+
|
80 |
+
Y = data_edge_index[0]
|
81 |
+
val_mask = torch.isin(Y, torch.tensor(val_nodes))
|
82 |
+
val_edge_index = data_edge_index[:, val_mask]
|
83 |
+
test_mask = torch.isin(Y, torch.tensor(test_nodes))
|
84 |
+
test_edge_index = data_edge_index[:, test_mask]
|
85 |
+
|
86 |
+
user_features = user_features.to(device)
|
87 |
+
movie_features = movie_features.to(device)
|
88 |
+
|
89 |
+
val_edge_index = val_edge_index.to(device)
|
90 |
+
test_edge_index = test_edge_index.to(device)
|
91 |
+
|
92 |
+
val_edge_index[1] += (user_features.shape[0])
|
93 |
+
test_edge_index[1] += (user_features.shape[0])
|
94 |
+
|
95 |
+
model_type = "GCN"
|
96 |
+
model = GNN(model_type, in_channels= 32, hidden_channels=128, out_channels=64)
|
97 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
|
98 |
+
model=model.to(device)
|
99 |
+
|
100 |
+
model.load_state_dict(torch.load('model_GCN.pth', map_location=torch.device('cpu')))
|
101 |
+
|
102 |
+
model.eval()
|
103 |
+
with torch.no_grad():
|
104 |
+
embeddings = model(user_features, movie_features, test_edge_index)
|
105 |
+
|
106 |
+
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
|
107 |
+
|
108 |
+
users = test_edge_index[0].unique()
|
109 |
+
|
110 |
+
def display_scores(top_k, test_edges_q_indices):
|
111 |
+
p_10 = sum(1 for i in top_k if i in test_edges_q_indices)
|
112 |
+
# p_5 = sum(1 for i in top_k[:5] if i in test_edges_q_indices)
|
113 |
+
# p_1 = 1 if top_k[0] in test_edges_q_indices else 0
|
114 |
+
r_10 = p_10/len(test_edges_q_indices)
|
115 |
+
for rank, node in enumerate(top_k):
|
116 |
+
if node in test_edges_q_indices:
|
117 |
+
mrr = 1 / (rank + 1)
|
118 |
+
break
|
119 |
+
dcg = 0.0
|
120 |
+
for rank, node in enumerate(top_k, start=1):
|
121 |
+
if node in test_edges_q_indices:
|
122 |
+
dcg += 1 / np.log2(rank + 1)
|
123 |
+
ideal_relevant = min(len(test_edges_q_indices), 10)
|
124 |
+
idcg = sum(1 / np.log2(rank + 1) for rank in range(1, ideal_relevant + 1))
|
125 |
+
ndcg = dcg / idcg if idcg > 0 else 0.0
|
126 |
+
logging.info(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
|
127 |
+
print(f"Precision@10: {p_10}, Recall@10: {r_10}, MRR: {mrr}, nDCG: {ndcg}")
|
128 |
+
|
129 |
+
def get_genres_movies_for_user(user_id):
|
130 |
+
curr_node = torch.tensor(user_id)
|
131 |
+
self_emb = embeddings[curr_node]
|
132 |
+
itm = embeddings[user_features.shape[0]:]
|
133 |
+
similarities = cos(self_emb, itm)
|
134 |
+
sorted_indices = torch.argsort(similarities, descending=True)
|
135 |
+
sorted_indices = [i + user_features.shape[0] for i in sorted_indices]
|
136 |
+
test_edges_q_indices = test_edge_index[1][test_edge_index[0] == curr_node]
|
137 |
+
top_k = sorted_indices[:10]
|
138 |
+
|
139 |
+
display_scores(top_k, test_edges_q_indices)
|
140 |
+
|
141 |
+
pred_mids = [(i - user_features.shape[0]).item() for i in top_k]
|
142 |
+
actual_mids = [(i - user_features.shape[0]).item() for i in test_edges_q_indices]
|
143 |
+
# liked_movies = [id_movie_map[i] for i in actual_mids]
|
144 |
+
# pred_movies = [id_movie_map[i] for i in pred_mids]
|
145 |
+
|
146 |
+
user_genre = {i:0 for i in range(len(genres))}
|
147 |
+
for amid in actual_mids:
|
148 |
+
for gr_id, g in genres.items():
|
149 |
+
# for g in grp:
|
150 |
+
if df.iloc[amid][g] == 1:
|
151 |
+
user_genre[gr_id] += 1
|
152 |
+
sort_user_genre = sorted(user_genre.items(), key=lambda item: item[1], reverse=True)
|
153 |
+
sort_user_genre = [i for i, _ in sort_user_genre]
|
154 |
+
top_genre_user = [genres[sort_user_genre[0]], genres[sort_user_genre[1]], genres[sort_user_genre[2]]]
|
155 |
+
|
156 |
+
our_movies = []
|
157 |
+
for pmid in pred_mids:
|
158 |
+
mname = df.iloc[pmid]["info"]
|
159 |
+
movie = {}
|
160 |
+
movie["title"] = mname
|
161 |
+
tmp = []
|
162 |
+
for gr_id, g in genres.items():
|
163 |
+
# for g in grp:
|
164 |
+
if df.iloc[pmid][g] == 1:
|
165 |
+
tmp.append(g)
|
166 |
+
movie["genres"] = tmp
|
167 |
+
our_movies.append(movie)
|
168 |
+
|
169 |
+
return top_genre_user, our_movies
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
numpy
|
3 |
+
torch-geometric
|
u.item
ADDED
The diff for this file is too large to render.
See raw diff
|
|