Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from scipy.sparse import csr_matrix | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gradio as gr | |
import zipfile | |
import random | |
input_count = 300 | |
result_count = 21 | |
# Extract the MovieLens dataset | |
with zipfile.ZipFile('ml-latest-small.zip') as z: | |
with z.open('ml-latest-small/movies.csv') as f: | |
movies = pd.read_csv(f) | |
with z.open('ml-latest-small/ratings.csv') as f: | |
ratings = pd.read_csv(f) | |
# Create a movie-user matrix | |
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0) | |
# Compute the cosine similarity between movies | |
movie_similarity = cosine_similarity(movie_user_matrix) | |
# Create a DataFrame with movie similarities | |
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index) | |
# Function to get movie recommendations using item-based collaborative filtering | |
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count): | |
# Get the movieId for the input movie title | |
movie_id = movies[movies['title'] == movie_title]['movieId'].values[0] | |
# Check if the movie is in our similarity matrix | |
if movie_id not in movie_similarity_df.index: | |
return [] | |
# Get the row of similarity scores for this movie | |
similar_scores = movie_similarity_df.loc[movie_id] | |
# Sort the scores in descending order | |
similar_scores = similar_scores.sort_values(ascending=False) | |
# Get the indices of the top-n most similar movies (excluding the input movie itself) | |
similar_movie_indices = similar_scores.index[1:n+1] | |
# Get the titles and similarity scores of the recommended movies | |
recommendations = [] | |
for idx in similar_movie_indices: | |
title = movies.loc[movies['movieId'] == idx, 'title'].values[0] | |
score = similar_scores[idx] | |
recommendations.append((title, score)) | |
return recommendations | |
# Function for Gradio interface | |
def recommend_movies_cf(movie_title): | |
if movie_title not in movies['title'].values: | |
return f"Movie '{movie_title}' not found in the dataset." | |
recommendations = get_cf_recommendations(movie_title) | |
format_string = "{:>5.2f} {:<20}" | |
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations]) | |
# Create a list of movie titles for the dropdown | |
movie_list = random.sample(movies['title'].tolist(), input_count) | |
total_movies = len(movies) | |
# Update the Gradio interface | |
with gr.Blocks() as iface: | |
with gr.Tab("Content-Based Filtering"): | |
# gr.Interface(fn=recommend_movies, | |
# inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), | |
# outputs=[gr.Textbox(label="Recommended Movies:")], | |
# title="Movie Recommender - Content-Based Filtering", | |
# description="Select a movie to get recommendations based on content filtering.") | |
with gr.Tab("Collaborative Filtering"): | |
gr.Interface(fn=recommend_movies_cf, | |
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), | |
outputs=[gr.Textbox(label="Recommended Movies:")], | |
title="Movie Recommender - Item-Based Collaborative Filtering", | |
description="Select a movie to get recommendations based on collaborative filtering.") | |
# Launch the app | |
iface.launch() |