Spaces:

jchen8000
/

Recommendation_Demo

Sleeping

App Files Files Community

Recommendation_Demo / app.py

jchen8000

Update app.py

1581906 verified 9 months ago

raw

history blame

3.07 kB

	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import linear_kernel
	import gradio as gr
	import zipfile
	import random

	input_count = 300
	result_count = 21

	# Extract the MovieLens dataset
	with zipfile.ZipFile('ml-latest-small.zip') as z:
	with z.open('ml-latest-small/movies.csv') as f:
	movies = pd.read_csv(f)

	# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
	tfidf = TfidfVectorizer(stop_words='english')

	# Replace NaN with an empty string
	movies['genres'] = movies['genres'].fillna('')

	# Construct the required TF-IDF matrix by fitting and transforming the data
	tfidf_matrix = tfidf.fit_transform(movies['genres'])

	# Compute the cosine similarity matrix
	cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

	# Construct a reverse map of indices and movie titles
	indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

	# Function that takes in movie title as input and outputs most similar movies
	def get_recommendations(title, cosine_sim=cosine_sim):

	# Get the index of the movie that matches the title
	idx = indices[title]

	# Get the pairwise similarity scores of all movies with that movie
	sim_scores = list(enumerate(cosine_sim[idx]))

	# Sort the movies based on the similarity scores
	sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

	# Get the scores of the 20 most similar movies
	sim_scores = sim_scores[1:result_count]

	# Get the movie indices
	movie_indices = [i[0] for i in sim_scores]

	# Return the top 20 most similar movies with their scores
	recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
	return recommendations

	# Gradio interface
	def recommend_movies(movie):
	if not movie:
	return "No movie selected. Please select one from the dropdown."

	recommendations = get_recommendations(movie)
	format_string = "{:>5.2f} {:<20}"
	return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])

	# Create the Gradio interface
	movie_list = random.sample(movies['title'].tolist(), input_count)
	total_movies = len(movies)

	with gr.Blocks() as iface:
	with gr.Tab("Content-Based Filtering"):
	# gr.Markdown("## Recommendation - Content-Based Filtering")
	gr.Interface(fn=recommend_movies,
	inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
	outputs=[gr.Textbox(label="Recommended Movies:")],
	title="Movie Recommender - Content-Based Filtering",
	description="Select a movie to get recommendations based on content filtering.")

	with gr.Tab("Collaborative Filtering"):
	gr.Markdown("## Recommendation - Collaborative Filtering")
	gr.Markdown("### In construction")

	# Launch the app
	iface.launch()