jchen8000 commited on
Commit
d3012ab
·
verified ·
1 Parent(s): 0312909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -44
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import pandas as pd
2
  import numpy as np
3
  from scipy.sparse import csr_matrix
4
- from sklearn.neighbors import NearestNeighbors
5
  import gradio as gr
6
  import zipfile
7
  import random
@@ -16,58 +16,56 @@ with zipfile.ZipFile('ml-latest-small.zip') as z:
16
  with z.open('ml-latest-small/ratings.csv') as f:
17
  ratings = pd.read_csv(f)
18
 
19
- # Create a user-item matrix
20
- user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
21
 
22
- # Create a sparse matrix
23
- user_item_matrix_sparse = csr_matrix(user_item_matrix.values)
24
 
25
- # Fit the NearestNeighbors model
26
- model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
27
- model_knn.fit(user_item_matrix_sparse)
28
 
29
- # Function to get movie recommendations using collaborative filtering
30
- def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies):
31
- if user_id not in user_item_matrix.index:
 
 
 
 
32
  return []
33
-
34
- user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
35
- distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count)
36
 
37
- similar_users = user_item_matrix.index[indices.flatten()]
38
- similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()})
 
 
 
39
 
40
- user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0])
 
41
 
 
42
  recommendations = []
43
- for _, row in similar_users_df.iterrows():
44
- similar_user_id = row['userId']
45
- similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0])
46
- new_movies = similar_user_movies - user_seen_movies
47
-
48
- for movie_id in new_movies:
49
- movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0]
50
- score = 1 - row['distance'] # Convert distance to similarity score
51
- recommendations.append((movie_title, score))
52
 
53
- recommendations.sort(key=lambda x: x[1], reverse=True)
54
- return recommendations[:result_count]
55
 
56
- # Gradio interface for collaborative filtering
57
- def recommend_movies_cf(user_id):
58
- try:
59
- user_id = int(user_id)
60
- except ValueError:
61
- return "Please enter a valid user ID (integer)."
62
 
63
- if user_id not in user_item_matrix.index:
64
- return f"User ID {user_id} not found in the dataset."
65
-
66
- recommendations = get_cf_recommendations(user_id)
67
  format_string = "{:>5.2f} {:<20}"
68
  return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
69
 
70
- # Update the existing Gradio interface
 
 
 
 
71
  with gr.Blocks() as iface:
72
  with gr.Tab("Content-Based Filtering"):
73
  # gr.Interface(fn=recommend_movies,
@@ -75,15 +73,13 @@ with gr.Blocks() as iface:
75
  # outputs=[gr.Textbox(label="Recommended Movies:")],
76
  # title="Movie Recommender - Content-Based Filtering",
77
  # description="Select a movie to get recommendations based on content filtering.")
78
- gr.Markdown("## Recommendation - Content-Based Filtering")
79
- gr.Markdown("### In construction")
80
 
81
  with gr.Tab("Collaborative Filtering"):
82
  gr.Interface(fn=recommend_movies_cf,
83
- inputs=gr.Number(label="Enter User ID"),
84
  outputs=[gr.Textbox(label="Recommended Movies:")],
85
- title="Movie Recommender - Collaborative Filtering",
86
- description="Enter a user ID to get movie recommendations based on collaborative filtering.")
87
 
88
  # Launch the app
89
  iface.launch()
 
1
  import pandas as pd
2
  import numpy as np
3
  from scipy.sparse import csr_matrix
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
  import gradio as gr
6
  import zipfile
7
  import random
 
16
  with z.open('ml-latest-small/ratings.csv') as f:
17
  ratings = pd.read_csv(f)
18
 
19
+ # Create a movie-user matrix
20
+ movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
21
 
22
+ # Compute the cosine similarity between movies
23
+ movie_similarity = cosine_similarity(movie_user_matrix)
24
 
25
+ # Create a DataFrame with movie similarities
26
+ movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
 
27
 
28
+ # Function to get movie recommendations using item-based collaborative filtering
29
+ def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
30
+ # Get the movieId for the input movie title
31
+ movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
32
+
33
+ # Check if the movie is in our similarity matrix
34
+ if movie_id not in movie_similarity_df.index:
35
  return []
 
 
 
36
 
37
+ # Get the row of similarity scores for this movie
38
+ similar_scores = movie_similarity_df.loc[movie_id]
39
+
40
+ # Sort the scores in descending order
41
+ similar_scores = similar_scores.sort_values(ascending=False)
42
 
43
+ # Get the indices of the top-n most similar movies (excluding the input movie itself)
44
+ similar_movie_indices = similar_scores.index[1:n+1]
45
 
46
+ # Get the titles and similarity scores of the recommended movies
47
  recommendations = []
48
+ for idx in similar_movie_indices:
49
+ title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
50
+ score = similar_scores[idx]
51
+ recommendations.append((title, score))
 
 
 
 
 
52
 
53
+ return recommendations
 
54
 
55
+ # Function for Gradio interface
56
+ def recommend_movies_cf(movie_title):
57
+ if movie_title not in movies['title'].values:
58
+ return f"Movie '{movie_title}' not found in the dataset."
 
 
59
 
60
+ recommendations = get_cf_recommendations(movie_title)
 
 
 
61
  format_string = "{:>5.2f} {:<20}"
62
  return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
63
 
64
+ # Create a list of movie titles for the dropdown
65
+ movie_list = random.sample(movies['title'].tolist(), input_count)
66
+ total_movies = len(movies)
67
+
68
+ # Update the Gradio interface
69
  with gr.Blocks() as iface:
70
  with gr.Tab("Content-Based Filtering"):
71
  # gr.Interface(fn=recommend_movies,
 
73
  # outputs=[gr.Textbox(label="Recommended Movies:")],
74
  # title="Movie Recommender - Content-Based Filtering",
75
  # description="Select a movie to get recommendations based on content filtering.")
 
 
76
 
77
  with gr.Tab("Collaborative Filtering"):
78
  gr.Interface(fn=recommend_movies_cf,
79
+ inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
80
  outputs=[gr.Textbox(label="Recommended Movies:")],
81
+ title="Movie Recommender - Item-Based Collaborative Filtering",
82
+ description="Select a movie to get recommendations based on collaborative filtering.")
83
 
84
  # Launch the app
85
  iface.launch()