Spaces:

jchen8000
/

Recommendation_Demo

Sleeping

App Files Files Community

jchen8000 commited on Sep 23, 2024

Commit

eceea93

verified ·

1 Parent(s): 1581906

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -48

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import linear_kernel
 import gradio as gr
 import zipfile
 import random
@@ -12,60 +13,63 @@ result_count = 21
 with zipfile.ZipFile('ml-latest-small.zip') as z:
     with z.open('ml-latest-small/movies.csv') as f:
         movies = pd.read_csv(f)
-# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
-tfidf = TfidfVectorizer(stop_words='english')
-# Replace NaN with an empty string
-movies['genres'] = movies['genres'].fillna('')
-# Construct the required TF-IDF matrix by fitting and transforming the data
-tfidf_matrix = tfidf.fit_transform(movies['genres'])
-# Compute the cosine similarity matrix
-cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
-# Construct a reverse map of indices and movie titles
-indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
-# Function that takes in movie title as input and outputs most similar movies
-def get_recommendations(title, cosine_sim=cosine_sim):
-    # Get the index of the movie that matches the title
-    idx = indices[title]
-    # Get the pairwise similarity scores of all movies with that movie
-    sim_scores = list(enumerate(cosine_sim[idx]))
-    # Sort the movies based on the similarity scores
-    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
-    # Get the scores of the 20 most similar movies
-    sim_scores = sim_scores[1:result_count]
-    # Get the movie indices
-    movie_indices = [i[0] for i in sim_scores]
-    # Return the top 20 most similar movies with their scores
-    recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
-    return recommendations
-# Gradio interface
-def recommend_movies(movie):
-    if not movie:
-        return "No movie selected. Please select one from the dropdown."
-    recommendations = get_recommendations(movie)
     format_string = "{:>5.2f}       {:<20}"
     return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
-# Create the Gradio interface
-movie_list = random.sample(movies['title'].tolist(), input_count)
-total_movies = len(movies)
 with gr.Blocks() as iface:
     with gr.Tab("Content-Based Filtering"):
-        # gr.Markdown("## Recommendation - Content-Based Filtering")
         gr.Interface(fn=recommend_movies,
                      inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
                      outputs=[gr.Textbox(label="Recommended Movies:")],
@@ -73,8 +77,11 @@ with gr.Blocks() as iface:
                      description="Select a movie to get recommendations based on content filtering.")
     with gr.Tab("Collaborative Filtering"):
-        gr.Markdown("## Recommendation - Collaborative Filtering")
-        gr.Markdown("### In construction")
 # Launch the app
-iface.launch()

 import pandas as pd
+import numpy as np
+from scipy.sparse import csr_matrix
+from sklearn.neighbors import NearestNeighbors
 import gradio as gr
 import zipfile
 import random
 with zipfile.ZipFile('ml-latest-small.zip') as z:
     with z.open('ml-latest-small/movies.csv') as f:
         movies = pd.read_csv(f)
+    with z.open('ml-latest-small/ratings.csv') as f:
+        ratings = pd.read_csv(f)
+# Create a user-item matrix
+user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
+# Create a sparse matrix
+user_item_matrix_sparse = csr_matrix(user_item_matrix.values)
+# Fit the NearestNeighbors model
+model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
+model_knn.fit(user_item_matrix_sparse)
+# Function to get movie recommendations using collaborative filtering
+def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies):
+    if user_id not in user_item_matrix.index:
+        return []
+    user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
+    distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count)
+    similar_users = user_item_matrix.index[indices.flatten()]
+    similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()})
+    user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0])
+    recommendations = []
+    for _, row in similar_users_df.iterrows():
+        similar_user_id = row['userId']
+        similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0])
+        new_movies = similar_user_movies - user_seen_movies
+        for movie_id in new_movies:
+            movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0]
+            score = 1 - row['distance']  # Convert distance to similarity score
+            recommendations.append((movie_title, score))
+    recommendations.sort(key=lambda x: x[1], reverse=True)
+    return recommendations[:result_count]
+# Gradio interface for collaborative filtering
+def recommend_movies_cf(user_id):
+    try:
+        user_id = int(user_id)
+    except ValueError:
+        return "Please enter a valid user ID (integer)."
+    if user_id not in user_item_matrix.index:
+        return f"User ID {user_id} not found in the dataset."
+    recommendations = get_cf_recommendations(user_id)
     format_string = "{:>5.2f}       {:<20}"
     return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
+# Update the existing Gradio interface
 with gr.Blocks() as iface:
     with gr.Tab("Content-Based Filtering"):
         gr.Interface(fn=recommend_movies,
                      inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
                      outputs=[gr.Textbox(label="Recommended Movies:")],
                      description="Select a movie to get recommendations based on content filtering.")
     with gr.Tab("Collaborative Filtering"):
+        gr.Interface(fn=recommend_movies_cf,
+                     inputs=gr.Number(label="Enter User ID"),
+                     outputs=[gr.Textbox(label="Recommended Movies:")],
+                     title="Movie Recommender - Collaborative Filtering",
+                     description="Enter a user ID to get movie recommendations based on collaborative filtering.")
 # Launch the app
+iface.launch()