Spaces:
Sleeping
Sleeping
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import linear_kernel | |
import gradio as gr | |
import zipfile | |
import random | |
input_count = 300 | |
result_count = 21 | |
# Extract the MovieLens dataset | |
with zipfile.ZipFile('ml-latest-small.zip') as z: | |
with z.open('ml-latest-small/movies.csv') as f: | |
movies = pd.read_csv(f) | |
# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a' | |
tfidf = TfidfVectorizer(stop_words='english') | |
# Replace NaN with an empty string | |
movies['genres'] = movies['genres'].fillna('') | |
# Construct the required TF-IDF matrix by fitting and transforming the data | |
tfidf_matrix = tfidf.fit_transform(movies['genres']) | |
# Compute the cosine similarity matrix | |
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) | |
# Construct a reverse map of indices and movie titles | |
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates() | |
# Function that takes in movie title as input and outputs most similar movies | |
def get_recommendations(title, cosine_sim=cosine_sim): | |
# Get the index of the movie that matches the title | |
idx = indices[title] | |
# Get the pairwise similarity scores of all movies with that movie | |
sim_scores = list(enumerate(cosine_sim[idx])) | |
# Sort the movies based on the similarity scores | |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) | |
# Get the scores of the 20 most similar movies | |
sim_scores = sim_scores[1:result_count] | |
# Get the movie indices | |
movie_indices = [i[0] for i in sim_scores] | |
# Return the top 20 most similar movies with their scores | |
recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)] | |
return recommendations | |
# Gradio interface | |
def recommend_movies(movie): | |
if not movie: | |
return "No movie selected. Please select one from the dropdown." | |
recommendations = get_recommendations(movie) | |
format_string = "{:>5.2f} {:<20}" | |
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations]) | |
# Create the Gradio interface | |
movie_list = random.sample(movies['title'].tolist(), input_count) | |
total_movies = len(movies) | |
with gr.Blocks() as iface: | |
with gr.Tab("Content-Based Filtering"): | |
# gr.Markdown("## Recommendation - Content-Based Filtering") | |
gr.Interface(fn=recommend_movies, | |
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"), | |
outputs=[gr.Textbox(label="Recommended Movies:")], | |
title="Movie Recommender - Content-Based Filtering", | |
description="Select a movie to get recommendations based on content filtering.") | |
with gr.Tab("Collaborative Filtering"): | |
gr.Markdown("## Recommendation - Collaborative Filtering") | |
gr.Markdown("### In construction") | |
# Launch the app | |
iface.launch() | |