Spaces:
Sleeping
Sleeping
File size: 3,114 Bytes
725c528 30059ee 88a2d43 e28d195 35aecae 725c528 30059ee 725c528 5254954 35aecae 725c528 5254954 9c81a28 725c528 693eb08 993d08d c2e19f2 725c528 88a2d43 993d08d 7dbbd88 924915f 7dbbd88 88a2d43 9dba8e7 c2e19f2 7dbbd88 725c528 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import gradio as gr
import zipfile
import requests
import io
import random
input_count = 300
result_count = 21
# Download and extract the MovieLens dataset
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
with z.open('ml-latest-small/movies.csv') as f:
movies = pd.read_csv(f)
# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')
# Replace NaN with an empty string
movies['genres'] = movies['genres'].fillna('')
# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(movies['genres'])
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Construct a reverse map of indices and movie titles
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title, cosine_sim=cosine_sim):
# Get the index of the movie that matches the title
idx = indices[title]
# Get the pairwise similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 20 most similar movies
sim_scores = sim_scores[1:result_count]
# Get the movie indices
movie_indices = [i[0] for i in sim_scores]
# Return the top 20 most similar movies with their scores
recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
return recommendations
# Gradio interface
def recommend_movies(movie):
recommendations = get_recommendations(movie)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
# Create the Gradio interface
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)
with gr.Blocks() as iface:
with gr.Tab("Content-Based Filtering"):
# gr.Markdown("## Recommendation - Content-Based Filtering")
gr.Interface(fn=recommend_movies,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
title="Movie Recommender - Content-Based Filtering",
description="Select a movie to get recommendations based on content filtering.")
with gr.Tab("Collaborative Filtering"):
gr.Markdown("## Recommendation - Collaborative Filtering")
gr.Markdown("### In construction")
# Launch the app
iface.launch()
|