jchen8000 commited on
Commit
eceea93
·
verified ·
1 Parent(s): 1581906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -48
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import pandas as pd
2
- from sklearn.feature_extraction.text import TfidfVectorizer
3
- from sklearn.metrics.pairwise import linear_kernel
 
4
  import gradio as gr
5
  import zipfile
6
  import random
@@ -12,60 +13,63 @@ result_count = 21
12
  with zipfile.ZipFile('ml-latest-small.zip') as z:
13
  with z.open('ml-latest-small/movies.csv') as f:
14
  movies = pd.read_csv(f)
 
 
15
 
16
- # Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
17
- tfidf = TfidfVectorizer(stop_words='english')
18
 
19
- # Replace NaN with an empty string
20
- movies['genres'] = movies['genres'].fillna('')
21
 
22
- # Construct the required TF-IDF matrix by fitting and transforming the data
23
- tfidf_matrix = tfidf.fit_transform(movies['genres'])
 
24
 
25
- # Compute the cosine similarity matrix
26
- cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
 
 
27
 
28
- # Construct a reverse map of indices and movie titles
29
- indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
30
-
31
- # Function that takes in movie title as input and outputs most similar movies
32
- def get_recommendations(title, cosine_sim=cosine_sim):
33
 
34
- # Get the index of the movie that matches the title
35
- idx = indices[title]
36
-
37
- # Get the pairwise similarity scores of all movies with that movie
38
- sim_scores = list(enumerate(cosine_sim[idx]))
39
-
40
- # Sort the movies based on the similarity scores
41
- sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
42
-
43
- # Get the scores of the 20 most similar movies
44
- sim_scores = sim_scores[1:result_count]
45
-
46
- # Get the movie indices
47
- movie_indices = [i[0] for i in sim_scores]
48
-
49
- # Return the top 20 most similar movies with their scores
50
- recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
51
- return recommendations
52
-
53
- # Gradio interface
54
- def recommend_movies(movie):
55
- if not movie:
56
- return "No movie selected. Please select one from the dropdown."
57
 
58
- recommendations = get_recommendations(movie)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  format_string = "{:>5.2f} {:<20}"
60
  return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
61
 
62
- # Create the Gradio interface
63
- movie_list = random.sample(movies['title'].tolist(), input_count)
64
- total_movies = len(movies)
65
-
66
  with gr.Blocks() as iface:
67
  with gr.Tab("Content-Based Filtering"):
68
- # gr.Markdown("## Recommendation - Content-Based Filtering")
69
  gr.Interface(fn=recommend_movies,
70
  inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
71
  outputs=[gr.Textbox(label="Recommended Movies:")],
@@ -73,8 +77,11 @@ with gr.Blocks() as iface:
73
  description="Select a movie to get recommendations based on content filtering.")
74
 
75
  with gr.Tab("Collaborative Filtering"):
76
- gr.Markdown("## Recommendation - Collaborative Filtering")
77
- gr.Markdown("### In construction")
78
-
 
 
 
79
  # Launch the app
80
- iface.launch()
 
1
  import pandas as pd
2
+ import numpy as np
3
+ from scipy.sparse import csr_matrix
4
+ from sklearn.neighbors import NearestNeighbors
5
  import gradio as gr
6
  import zipfile
7
  import random
 
13
  with zipfile.ZipFile('ml-latest-small.zip') as z:
14
  with z.open('ml-latest-small/movies.csv') as f:
15
  movies = pd.read_csv(f)
16
+ with z.open('ml-latest-small/ratings.csv') as f:
17
+ ratings = pd.read_csv(f)
18
 
19
+ # Create a user-item matrix
20
+ user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
21
 
22
+ # Create a sparse matrix
23
+ user_item_matrix_sparse = csr_matrix(user_item_matrix.values)
24
 
25
+ # Fit the NearestNeighbors model
26
+ model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
27
+ model_knn.fit(user_item_matrix_sparse)
28
 
29
+ # Function to get movie recommendations using collaborative filtering
30
+ def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies):
31
+ if user_id not in user_item_matrix.index:
32
+ return []
33
 
34
+ user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
35
+ distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count)
 
 
 
36
 
37
+ similar_users = user_item_matrix.index[indices.flatten()]
38
+ similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()})
39
+
40
+ user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0])
41
+
42
+ recommendations = []
43
+ for _, row in similar_users_df.iterrows():
44
+ similar_user_id = row['userId']
45
+ similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0])
46
+ new_movies = similar_user_movies - user_seen_movies
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ for movie_id in new_movies:
49
+ movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0]
50
+ score = 1 - row['distance'] # Convert distance to similarity score
51
+ recommendations.append((movie_title, score))
52
+
53
+ recommendations.sort(key=lambda x: x[1], reverse=True)
54
+ return recommendations[:result_count]
55
+
56
+ # Gradio interface for collaborative filtering
57
+ def recommend_movies_cf(user_id):
58
+ try:
59
+ user_id = int(user_id)
60
+ except ValueError:
61
+ return "Please enter a valid user ID (integer)."
62
+
63
+ if user_id not in user_item_matrix.index:
64
+ return f"User ID {user_id} not found in the dataset."
65
+
66
+ recommendations = get_cf_recommendations(user_id)
67
  format_string = "{:>5.2f} {:<20}"
68
  return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
69
 
70
+ # Update the existing Gradio interface
 
 
 
71
  with gr.Blocks() as iface:
72
  with gr.Tab("Content-Based Filtering"):
 
73
  gr.Interface(fn=recommend_movies,
74
  inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
75
  outputs=[gr.Textbox(label="Recommended Movies:")],
 
77
  description="Select a movie to get recommendations based on content filtering.")
78
 
79
  with gr.Tab("Collaborative Filtering"):
80
+ gr.Interface(fn=recommend_movies_cf,
81
+ inputs=gr.Number(label="Enter User ID"),
82
+ outputs=[gr.Textbox(label="Recommended Movies:")],
83
+ title="Movie Recommender - Collaborative Filtering",
84
+ description="Enter a user ID to get movie recommendations based on collaborative filtering.")
85
+
86
  # Launch the app
87
+ iface.launch()