Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
|
4 |
-
""
|
5 |
-
|
6 |
-
"""
|
7 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
8 |
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
history: list[tuple[str, str]],
|
13 |
-
system_message,
|
14 |
-
max_tokens,
|
15 |
-
temperature,
|
16 |
-
top_p,
|
17 |
-
):
|
18 |
-
messages = [{"role": "system", "content": system_message}]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
if val[1]:
|
24 |
-
messages.append({"role": "assistant", "content": val[1]})
|
25 |
|
26 |
-
|
|
|
27 |
|
28 |
-
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
"""
|
44 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
45 |
-
"""
|
46 |
demo = gr.ChatInterface(
|
47 |
respond,
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
value=0.95,
|
56 |
-
step=0.05,
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
-
),
|
59 |
-
],
|
60 |
)
|
61 |
|
62 |
-
|
63 |
-
if __name__ == "__main__":
|
64 |
-
demo.launch()
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
import requests
|
7 |
import gradio as gr
|
8 |
+
import os
|
9 |
|
10 |
+
ratings = pd.read_csv("ratings.csv")
|
11 |
+
movies = pd.read_csv("movies.csv")
|
|
|
|
|
12 |
|
13 |
+
OMDB_API_KEY = os.environ.get("omdbapikey")
|
14 |
|
15 |
+
movie_lookup = movies.set_index("movieId")["title"].to_dict()
|
16 |
+
reverse_movie_lookup = {v.lower(): k for k, v in movie_lookup.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)
|
19 |
+
train_matrix = train_df.pivot_table(index='userId', columns='movieId', values='rating')
|
20 |
+
train_matrix_filled = train_matrix.fillna(0)
|
|
|
|
|
21 |
|
22 |
+
user_similarity = cosine_similarity(train_matrix_filled)
|
23 |
+
user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix_filled.index, columns=train_matrix_filled.index)
|
24 |
|
25 |
+
item_rating_matrix = train_matrix_filled.T
|
26 |
+
item_similarity = cosine_similarity(item_rating_matrix)
|
27 |
+
item_similarity_df = pd.DataFrame(item_similarity, index=item_rating_matrix.index, columns=item_rating_matrix.index)
|
28 |
|
29 |
+
data = pd.merge(ratings, movies, on='movieId')
|
30 |
+
data['genres'] = data['genres'].fillna('')
|
31 |
+
vectorizer = TfidfVectorizer(token_pattern=r'[a-zA-Z0-9\-]+')
|
32 |
+
tfidf_matrix = vectorizer.fit_transform(data['genres'].values)
|
33 |
+
movie_ids = data['movieId'].values
|
34 |
+
unique_movie_ids, indices = np.unique(movie_ids, return_index=True)
|
35 |
+
movie_id_to_index = {mid: idx for idx, mid in enumerate(unique_movie_ids)}
|
36 |
+
movie_genre_matrix = tfidf_matrix[indices]
|
37 |
|
38 |
+
def get_movie_poster(title):
|
39 |
+
if not OMDB_API_KEY:
|
40 |
+
return ''
|
41 |
+
try:
|
42 |
+
response = requests.get(f"http://www.omdbapi.com/?t={title}&apikey={OMDB_API_KEY}")
|
43 |
+
data = response.json()
|
44 |
+
return data.get('Poster', '')
|
45 |
+
except:
|
46 |
+
return ''
|
47 |
|
48 |
+
def user_cf_recommend(user_id):
|
49 |
+
try:
|
50 |
+
user_id = int(user_id)
|
51 |
+
if user_id not in user_similarity_df.index:
|
52 |
+
return "User ID not found."
|
53 |
+
|
54 |
+
similar_users = user_similarity_df[user_id].drop(user_id)
|
55 |
+
top_similar_users = similar_users.sort_values(ascending=False).head(10)
|
56 |
+
|
57 |
+
scores = {}
|
58 |
+
sim_sums = {}
|
59 |
+
|
60 |
+
for other_user, similarity in top_similar_users.items():
|
61 |
+
other_ratings = train_matrix.loc[other_user].dropna()
|
62 |
+
for movie_id, rating in other_ratings.items():
|
63 |
+
if movie_id not in train_matrix.loc[user_id] or pd.isna(train_matrix.loc[user_id, movie_id]):
|
64 |
+
scores[movie_id] = scores.get(movie_id, 0) + similarity * rating
|
65 |
+
sim_sums[movie_id] = sim_sums.get(movie_id, 0) + abs(similarity)
|
66 |
+
|
67 |
+
ranked_movies = sorted([(movie_id, score / sim_sums[movie_id]) for movie_id, score in scores.items() if sim_sums[movie_id] > 0],
|
68 |
+
key=lambda x: x[1], reverse=True)[:5]
|
69 |
+
|
70 |
+
result = []
|
71 |
+
for movie_id, score in ranked_movies:
|
72 |
+
title = movie_lookup.get(movie_id, 'Unknown')
|
73 |
+
poster = get_movie_poster(title)
|
74 |
+
result.append((title, round(score, 2), poster))
|
75 |
+
|
76 |
+
return result
|
77 |
+
except:
|
78 |
+
return "Invalid input."
|
79 |
+
|
80 |
+
def item_cf_recommend(movie_title):
|
81 |
+
movie_title = movie_title.lower().strip()
|
82 |
+
if movie_title not in reverse_movie_lookup:
|
83 |
+
return "Movie not found."
|
84 |
+
|
85 |
+
target_movie_id = reverse_movie_lookup[movie_title]
|
86 |
+
|
87 |
+
if target_movie_id not in item_similarity_df:
|
88 |
+
return "No similarity data available."
|
89 |
+
|
90 |
+
similar_scores = item_similarity_df[target_movie_id].drop(target_movie_id)
|
91 |
+
top_similar_ids = similar_scores.sort_values(ascending=False).head(5).index
|
92 |
+
|
93 |
+
result = []
|
94 |
+
for mid in top_similar_ids:
|
95 |
+
title = movie_lookup.get(mid, 'Unknown')
|
96 |
+
poster = get_movie_poster(title)
|
97 |
+
result.append((title, poster))
|
98 |
+
|
99 |
+
return result
|
100 |
+
|
101 |
+
def cb_recommend(movie_title):
|
102 |
+
movie_title = movie_title.strip().lower()
|
103 |
+
movies['title_lower'] = movies['title'].str.lower()
|
104 |
+
|
105 |
+
if movie_title not in movies['title_lower'].values:
|
106 |
+
return "Movie not found."
|
107 |
+
|
108 |
+
input_index = movies[movies['title_lower'] == movie_title].index[0]
|
109 |
+
movie_id = movies.loc[input_index, 'movieId']
|
110 |
+
|
111 |
+
if movie_id not in movie_id_to_index:
|
112 |
+
return "No genre data available."
|
113 |
+
|
114 |
+
input_vec = movie_genre_matrix[movie_id_to_index[movie_id]]
|
115 |
+
sims = cosine_similarity(input_vec, movie_genre_matrix).flatten()
|
116 |
+
sim_indices = sims.argsort()[::-1]
|
117 |
+
|
118 |
+
seen = set()
|
119 |
+
result = []
|
120 |
+
for i in sim_indices:
|
121 |
+
rec_movie_id = unique_movie_ids[i]
|
122 |
+
title = movies[movies['movieId'] == rec_movie_id]['title'].values[0]
|
123 |
+
if title.lower() != movie_title and title not in seen:
|
124 |
+
poster = get_movie_poster(title)
|
125 |
+
result.append((title, poster))
|
126 |
+
seen.add(title)
|
127 |
+
if len(result) == 5:
|
128 |
+
break
|
129 |
+
|
130 |
+
return result
|
131 |
+
|
132 |
+
def format_recommendations(recommendations):
|
133 |
+
if isinstance(recommendations, str):
|
134 |
+
return recommendations
|
135 |
+
|
136 |
+
formatted = []
|
137 |
+
for item in recommendations:
|
138 |
+
if len(item) == 3:
|
139 |
+
title, score, poster = item
|
140 |
+
if poster:
|
141 |
+
formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b><br>Predicted rating: {score}</div></div>")
|
142 |
+
else:
|
143 |
+
formatted.append(f"<div><b>{title}</b><br>Predicted rating: {score}</div>")
|
144 |
+
else:
|
145 |
+
title, poster = item
|
146 |
+
if poster:
|
147 |
+
formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b></div></div>")
|
148 |
+
else:
|
149 |
+
formatted.append(f"<div><b>{title}</b></div>")
|
150 |
+
|
151 |
+
return "<br>".join(formatted)
|
152 |
+
|
153 |
+
def respond(message, history):
|
154 |
+
message = message.lower().strip()
|
155 |
+
|
156 |
+
if message.startswith("recommend for user"):
|
157 |
+
try:
|
158 |
+
user_id = int(message.split()[-1])
|
159 |
+
recs = user_cf_recommend(user_id)
|
160 |
+
return format_recommendations(recs)
|
161 |
+
except:
|
162 |
+
return "Please provide a valid user ID after 'recommend for user'"
|
163 |
+
|
164 |
+
elif message.startswith("similar to"):
|
165 |
+
movie_title = message[10:].strip()
|
166 |
+
recs = item_cf_recommend(movie_title)
|
167 |
+
return format_recommendations(recs)
|
168 |
+
|
169 |
+
elif message.startswith("recommend like"):
|
170 |
+
movie_title = message[14:].strip()
|
171 |
+
recs = cb_recommend(movie_title)
|
172 |
+
return format_recommendations(recs)
|
173 |
+
|
174 |
+
else:
|
175 |
+
return "Available commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'"
|
176 |
|
|
|
|
|
|
|
177 |
demo = gr.ChatInterface(
|
178 |
respond,
|
179 |
+
title="Movie Recommendation Chatbot",
|
180 |
+
description="Ask for recommendations using these commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'",
|
181 |
+
examples=[
|
182 |
+
["recommend for user 42"],
|
183 |
+
["similar to Toy Story"],
|
184 |
+
["recommend like The Dark Knight"]
|
185 |
+
]
|
|
|
|
|
|
|
|
|
|
|
186 |
)
|
187 |
|
188 |
+
demo.launch()
|
|
|
|