Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from scipy.sparse import csr_matrix | |
from sklearn.neighbors import NearestNeighbors | |
import gradio as gr | |
import zipfile | |
import random | |
input_count = 300 | |
result_count = 21 | |
# Extract the MovieLens dataset | |
with zipfile.ZipFile('ml-latest-small.zip') as z: | |
with z.open('ml-latest-small/movies.csv') as f: | |
movies = pd.read_csv(f) | |
with z.open('ml-latest-small/ratings.csv') as f: | |
ratings = pd.read_csv(f) | |
# Create a user-item matrix | |
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0) | |
# Create a sparse matrix | |
user_item_matrix_sparse = csr_matrix(user_item_matrix.values) | |
# Fit the NearestNeighbors model | |
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1) | |
model_knn.fit(user_item_matrix_sparse) | |
# Function to get movie recommendations using collaborative filtering | |
def get_cf_recommendations(user_id, user_item_matrix=user_item_matrix, model_knn=model_knn, movies=movies): | |
if user_id not in user_item_matrix.index: | |
return [] | |
user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1) | |
distances, indices = model_knn.kneighbors(user_vector, n_neighbors=result_count) | |
similar_users = user_item_matrix.index[indices.flatten()] | |
similar_users_df = pd.DataFrame({'userId': similar_users, 'distance': distances.flatten()}) | |
user_seen_movies = set(user_item_matrix.columns[user_item_matrix.loc[user_id] > 0]) | |
recommendations = [] | |
for _, row in similar_users_df.iterrows(): | |
similar_user_id = row['userId'] | |
similar_user_movies = set(user_item_matrix.columns[user_item_matrix.loc[similar_user_id] > 0]) | |
new_movies = similar_user_movies - user_seen_movies | |
for movie_id in new_movies: | |
movie_title = movies.loc[movies['movieId'] == movie_id, 'title'].values[0] | |
score = 1 - row['distance'] # Convert distance to similarity score | |
recommendations.append((movie_title, score)) | |
recommendations.sort(key=lambda x: x[1], reverse=True) | |
return recommendations[:result_count] | |
# Gradio interface for collaborative filtering | |
def recommend_movies_cf(user_id): | |
try: | |
user_id = int(user_id) | |
except ValueError: | |
return "Please enter a valid user ID (integer)." | |
if user_id not in user_item_matrix.index: | |
return f"User ID {user_id} not found in the dataset." | |
recommendations = get_cf_recommendations(user_id) | |
format_string = "{:>5.2f} {:<20}" | |
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations]) | |
# Update the existing Gradio interface | |
with gr.Blocks() as iface: | |
with gr.Tab("Content-Based Filtering"): | |
gr.Interface(fn=recommend_movies, | |
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), | |
outputs=[gr.Textbox(label="Recommended Movies:")], | |
title="Movie Recommender - Content-Based Filtering", | |
description="Select a movie to get recommendations based on content filtering.") | |
with gr.Tab("Collaborative Filtering"): | |
gr.Interface(fn=recommend_movies_cf, | |
inputs=gr.Number(label="Enter User ID"), | |
outputs=[gr.Textbox(label="Recommended Movies:")], | |
title="Movie Recommender - Collaborative Filtering", | |
description="Enter a user ID to get movie recommendations based on collaborative filtering.") | |
# Launch the app | |
iface.launch() |