Spaces:
Sleeping
Sleeping
File size: 3,669 Bytes
725c528 eceea93 d3012ab 725c528 30059ee 88a2d43 e28d195 35aecae 725c528 02f4def 30059ee eceea93 725c528 d3012ab 725c528 d3012ab 725c528 d3012ab 725c528 d3012ab eceea93 dd07bd3 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab 993d08d c2e19f2 725c528 d3012ab 7dbbd88 0312909 7dbbd88 eceea93 d3012ab eceea93 d3012ab eceea93 725c528 eceea93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import zipfile
import random
input_count = 300
result_count = 21
# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
with z.open('ml-latest-small/movies.csv') as f:
movies = pd.read_csv(f)
with z.open('ml-latest-small/ratings.csv') as f:
ratings = pd.read_csv(f)
# Create a movie-user matrix
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
# Compute the cosine similarity between movies
movie_similarity = cosine_similarity(movie_user_matrix)
# Create a DataFrame with movie similarities
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
# Function to get movie recommendations using item-based collaborative filtering
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
# Get the movieId for the input movie title
movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
# Check if the movie is in our similarity matrix
if movie_id not in movie_similarity_df.index:
return []
# Get the row of similarity scores for this movie
similar_scores = movie_similarity_df.loc[movie_id]
# Sort the scores in descending order
similar_scores = similar_scores.sort_values(ascending=False)
# Get the indices of the top-n most similar movies (excluding the input movie itself)
similar_movie_indices = similar_scores.index[1:n+1]
# Get the titles and similarity scores of the recommended movies
recommendations = []
for idx in similar_movie_indices:
title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
score = similar_scores[idx]
recommendations.append((title, score))
return recommendations
# Function for Gradio interface
def recommend_movies_cf(movie_title):
if movie_title not in movies['title'].values:
return f"Movie '{movie_title}' not found in the dataset."
recommendations = get_cf_recommendations(movie_title)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
# Create a list of movie titles for the dropdown
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)
# Update the Gradio interface
with gr.Blocks() as iface:
with gr.Tab("Content-Based Filtering"):
# gr.Interface(fn=recommend_movies,
# inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
# outputs=[gr.Textbox(label="Recommended Movies:")],
# title="Movie Recommender - Content-Based Filtering",
# description="Select a movie to get recommendations based on content filtering.")
with gr.Tab("Collaborative Filtering"):
gr.Interface(fn=recommend_movies_cf,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
title="Movie Recommender - Item-Based Collaborative Filtering",
description="Select a movie to get recommendations based on collaborative filtering.")
# Launch the app
iface.launch() |