Spaces:

jchen8000
/

Recommendation_Demo

Sleeping

File size: 5,669 Bytes

725c528
eceea93
 
d3012ab
725c528
30059ee
88a2d43
 
e28d195
35aecae
725c528
02f4def
 
30059ee
 
eceea93
 
725c528
bd9a4ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3012ab
 
725c528
d3012ab
 
725c528
d3012ab
 
725c528
d3012ab
 
 
 
 
 
 
eceea93
dd07bd3
d3012ab
 
 
 
 
eceea93
d3012ab
 
eceea93
d3012ab
eceea93
d3012ab
 
 
 
eceea93
d3012ab
eceea93
d3012ab
 
 
 
eceea93
d3012ab
993d08d
c2e19f2
725c528
d3012ab
 
 
 
 
7dbbd88
 
bd9a4ce
 
 
 
 
7dbbd88
 
eceea93
d3012ab
eceea93
d3012ab
 
eceea93
725c528
eceea93

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import zipfile
import random

input_count = 300
result_count = 21

# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
    with z.open('ml-latest-small/movies.csv') as f:
        movies = pd.read_csv(f)
    with z.open('ml-latest-small/ratings.csv') as f:
        ratings = pd.read_csv(f)

######################################
#
# Content-based Filtering
#
######################################

# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN with an empty string
movies['genres'] = movies['genres'].fillna('')

# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Construct a reverse map of indices and movie titles
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Function that takes in movie title as input and outputs most similar movies
def get_cb_recommendations(title, cosine_sim=cosine_sim):
    
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 20 most similar movies
    sim_scores = sim_scores[1:result_count]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 20 most similar movies with their scores
    recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
    return recommendations

# Gradio interface
def recommend_movies_cb(movie):
    if not movie:
        return "No movie selected. Please select one from the dropdown."
        
    recommendations = get_cb_recommendations(movie)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])


######################################
#
# Collaborative Filtering (Item-based)
#
######################################
# Create a movie-user matrix
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)

# Compute the cosine similarity between movies
movie_similarity = cosine_similarity(movie_user_matrix)

# Create a DataFrame with movie similarities
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)

# Function to get movie recommendations using item-based collaborative filtering
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
    # Get the movieId for the input movie title
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
    
    # Check if the movie is in our similarity matrix
    if movie_id not in movie_similarity_df.index:
        return []
    
    # Get the row of similarity scores for this movie
    similar_scores = movie_similarity_df.loc[movie_id]
    
    # Sort the scores in descending order
    similar_scores = similar_scores.sort_values(ascending=False)
    
    # Get the indices of the top-n most similar movies (excluding the input movie itself)
    similar_movie_indices = similar_scores.index[1:n+1]
    
    # Get the titles and similarity scores of the recommended movies
    recommendations = []
    for idx in similar_movie_indices:
        title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
        score = similar_scores[idx]
        recommendations.append((title, score))
    
    return recommendations

# Function for Gradio interface
def recommend_movies_cf(movie_title):
    if movie_title not in movies['title'].values:
        return f"Movie '{movie_title}' not found in the dataset."
    
    recommendations = get_cf_recommendations(movie_title)
    format_string = "{:>5.2f}       {:<20}"
    return "Score     Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])

# Create a list of movie titles for the dropdown
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)

# Update the Gradio interface
with gr.Blocks() as iface:
    with gr.Tab("Content-Based Filtering"):
        gr.Interface(fn=recommend_movies_cb, 
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), 
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     title="Movie Recommender - Content-Based Filtering", 
                     description="Select a movie to get recommendations based on content filtering.")
    
    with gr.Tab("Collaborative Filtering"):
        gr.Interface(fn=recommend_movies_cf,
                     inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
                     outputs=[gr.Textbox(label="Recommended Movies:")],
                     title="Movie Recommender - Item-Based Collaborative Filtering",
                     description="Select a movie to get recommendations based on collaborative filtering.")

# Launch the app
iface.launch()