mojad121 commited on
Commit
e885901
·
verified ·
1 Parent(s): e67b2bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -49
app.py CHANGED
@@ -1,64 +1,188 @@
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
27
 
28
- response = ""
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
  )
61
 
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ import requests
7
  import gradio as gr
8
+ import os
9
 
10
+ ratings = pd.read_csv("ratings.csv")
11
+ movies = pd.read_csv("movies.csv")
 
 
12
 
13
+ OMDB_API_KEY = os.environ.get("omdbapikey")
14
 
15
+ movie_lookup = movies.set_index("movieId")["title"].to_dict()
16
+ reverse_movie_lookup = {v.lower(): k for k, v in movie_lookup.items()}
 
 
 
 
 
 
 
17
 
18
+ train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)
19
+ train_matrix = train_df.pivot_table(index='userId', columns='movieId', values='rating')
20
+ train_matrix_filled = train_matrix.fillna(0)
 
 
21
 
22
+ user_similarity = cosine_similarity(train_matrix_filled)
23
+ user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix_filled.index, columns=train_matrix_filled.index)
24
 
25
+ item_rating_matrix = train_matrix_filled.T
26
+ item_similarity = cosine_similarity(item_rating_matrix)
27
+ item_similarity_df = pd.DataFrame(item_similarity, index=item_rating_matrix.index, columns=item_rating_matrix.index)
28
 
29
+ data = pd.merge(ratings, movies, on='movieId')
30
+ data['genres'] = data['genres'].fillna('')
31
+ vectorizer = TfidfVectorizer(token_pattern=r'[a-zA-Z0-9\-]+')
32
+ tfidf_matrix = vectorizer.fit_transform(data['genres'].values)
33
+ movie_ids = data['movieId'].values
34
+ unique_movie_ids, indices = np.unique(movie_ids, return_index=True)
35
+ movie_id_to_index = {mid: idx for idx, mid in enumerate(unique_movie_ids)}
36
+ movie_genre_matrix = tfidf_matrix[indices]
37
 
38
+ def get_movie_poster(title):
39
+ if not OMDB_API_KEY:
40
+ return ''
41
+ try:
42
+ response = requests.get(f"http://www.omdbapi.com/?t={title}&apikey={OMDB_API_KEY}")
43
+ data = response.json()
44
+ return data.get('Poster', '')
45
+ except:
46
+ return ''
47
 
48
+ def user_cf_recommend(user_id):
49
+ try:
50
+ user_id = int(user_id)
51
+ if user_id not in user_similarity_df.index:
52
+ return "User ID not found."
53
+
54
+ similar_users = user_similarity_df[user_id].drop(user_id)
55
+ top_similar_users = similar_users.sort_values(ascending=False).head(10)
56
+
57
+ scores = {}
58
+ sim_sums = {}
59
+
60
+ for other_user, similarity in top_similar_users.items():
61
+ other_ratings = train_matrix.loc[other_user].dropna()
62
+ for movie_id, rating in other_ratings.items():
63
+ if movie_id not in train_matrix.loc[user_id] or pd.isna(train_matrix.loc[user_id, movie_id]):
64
+ scores[movie_id] = scores.get(movie_id, 0) + similarity * rating
65
+ sim_sums[movie_id] = sim_sums.get(movie_id, 0) + abs(similarity)
66
+
67
+ ranked_movies = sorted([(movie_id, score / sim_sums[movie_id]) for movie_id, score in scores.items() if sim_sums[movie_id] > 0],
68
+ key=lambda x: x[1], reverse=True)[:5]
69
+
70
+ result = []
71
+ for movie_id, score in ranked_movies:
72
+ title = movie_lookup.get(movie_id, 'Unknown')
73
+ poster = get_movie_poster(title)
74
+ result.append((title, round(score, 2), poster))
75
+
76
+ return result
77
+ except:
78
+ return "Invalid input."
79
+
80
+ def item_cf_recommend(movie_title):
81
+ movie_title = movie_title.lower().strip()
82
+ if movie_title not in reverse_movie_lookup:
83
+ return "Movie not found."
84
+
85
+ target_movie_id = reverse_movie_lookup[movie_title]
86
+
87
+ if target_movie_id not in item_similarity_df:
88
+ return "No similarity data available."
89
+
90
+ similar_scores = item_similarity_df[target_movie_id].drop(target_movie_id)
91
+ top_similar_ids = similar_scores.sort_values(ascending=False).head(5).index
92
+
93
+ result = []
94
+ for mid in top_similar_ids:
95
+ title = movie_lookup.get(mid, 'Unknown')
96
+ poster = get_movie_poster(title)
97
+ result.append((title, poster))
98
+
99
+ return result
100
+
101
+ def cb_recommend(movie_title):
102
+ movie_title = movie_title.strip().lower()
103
+ movies['title_lower'] = movies['title'].str.lower()
104
+
105
+ if movie_title not in movies['title_lower'].values:
106
+ return "Movie not found."
107
+
108
+ input_index = movies[movies['title_lower'] == movie_title].index[0]
109
+ movie_id = movies.loc[input_index, 'movieId']
110
+
111
+ if movie_id not in movie_id_to_index:
112
+ return "No genre data available."
113
+
114
+ input_vec = movie_genre_matrix[movie_id_to_index[movie_id]]
115
+ sims = cosine_similarity(input_vec, movie_genre_matrix).flatten()
116
+ sim_indices = sims.argsort()[::-1]
117
+
118
+ seen = set()
119
+ result = []
120
+ for i in sim_indices:
121
+ rec_movie_id = unique_movie_ids[i]
122
+ title = movies[movies['movieId'] == rec_movie_id]['title'].values[0]
123
+ if title.lower() != movie_title and title not in seen:
124
+ poster = get_movie_poster(title)
125
+ result.append((title, poster))
126
+ seen.add(title)
127
+ if len(result) == 5:
128
+ break
129
+
130
+ return result
131
+
132
+ def format_recommendations(recommendations):
133
+ if isinstance(recommendations, str):
134
+ return recommendations
135
+
136
+ formatted = []
137
+ for item in recommendations:
138
+ if len(item) == 3:
139
+ title, score, poster = item
140
+ if poster:
141
+ formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b><br>Predicted rating: {score}</div></div>")
142
+ else:
143
+ formatted.append(f"<div><b>{title}</b><br>Predicted rating: {score}</div>")
144
+ else:
145
+ title, poster = item
146
+ if poster:
147
+ formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b></div></div>")
148
+ else:
149
+ formatted.append(f"<div><b>{title}</b></div>")
150
+
151
+ return "<br>".join(formatted)
152
+
153
+ def respond(message, history):
154
+ message = message.lower().strip()
155
+
156
+ if message.startswith("recommend for user"):
157
+ try:
158
+ user_id = int(message.split()[-1])
159
+ recs = user_cf_recommend(user_id)
160
+ return format_recommendations(recs)
161
+ except:
162
+ return "Please provide a valid user ID after 'recommend for user'"
163
+
164
+ elif message.startswith("similar to"):
165
+ movie_title = message[10:].strip()
166
+ recs = item_cf_recommend(movie_title)
167
+ return format_recommendations(recs)
168
+
169
+ elif message.startswith("recommend like"):
170
+ movie_title = message[14:].strip()
171
+ recs = cb_recommend(movie_title)
172
+ return format_recommendations(recs)
173
+
174
+ else:
175
+ return "Available commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'"
176
 
 
 
 
177
  demo = gr.ChatInterface(
178
  respond,
179
+ title="Movie Recommendation Chatbot",
180
+ description="Ask for recommendations using these commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'",
181
+ examples=[
182
+ ["recommend for user 42"],
183
+ ["similar to Toy Story"],
184
+ ["recommend like The Dark Knight"]
185
+ ]
 
 
 
 
 
186
  )
187
 
188
+ demo.launch()