jchen8000's picture
Update app.py
d3012ab verified
raw
history blame
3.67 kB
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import zipfile
import random
input_count = 300
result_count = 21
# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
with z.open('ml-latest-small/movies.csv') as f:
movies = pd.read_csv(f)
with z.open('ml-latest-small/ratings.csv') as f:
ratings = pd.read_csv(f)
# Create a movie-user matrix
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
# Compute the cosine similarity between movies
movie_similarity = cosine_similarity(movie_user_matrix)
# Create a DataFrame with movie similarities
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
# Function to get movie recommendations using item-based collaborative filtering
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
# Get the movieId for the input movie title
movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
# Check if the movie is in our similarity matrix
if movie_id not in movie_similarity_df.index:
return []
# Get the row of similarity scores for this movie
similar_scores = movie_similarity_df.loc[movie_id]
# Sort the scores in descending order
similar_scores = similar_scores.sort_values(ascending=False)
# Get the indices of the top-n most similar movies (excluding the input movie itself)
similar_movie_indices = similar_scores.index[1:n+1]
# Get the titles and similarity scores of the recommended movies
recommendations = []
for idx in similar_movie_indices:
title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
score = similar_scores[idx]
recommendations.append((title, score))
return recommendations
# Function for Gradio interface
def recommend_movies_cf(movie_title):
if movie_title not in movies['title'].values:
return f"Movie '{movie_title}' not found in the dataset."
recommendations = get_cf_recommendations(movie_title)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
# Create a list of movie titles for the dropdown
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)
# Update the Gradio interface
with gr.Blocks() as iface:
with gr.Tab("Content-Based Filtering"):
# gr.Interface(fn=recommend_movies,
# inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
# outputs=[gr.Textbox(label="Recommended Movies:")],
# title="Movie Recommender - Content-Based Filtering",
# description="Select a movie to get recommendations based on content filtering.")
with gr.Tab("Collaborative Filtering"):
gr.Interface(fn=recommend_movies_cf,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
title="Movie Recommender - Item-Based Collaborative Filtering",
description="Select a movie to get recommendations based on collaborative filtering.")
# Launch the app
iface.launch()