Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
+
from sklearn.metrics.pairwise import linear_kernel
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
# Load the MovieLens dataset
|
7 |
+
movies = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-latest-small.zip', compression='zip')
|
8 |
+
|
9 |
+
# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
|
10 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
11 |
+
|
12 |
+
# Replace NaN with an empty string
|
13 |
+
movies['genres'] = movies['genres'].fillna('')
|
14 |
+
|
15 |
+
# Construct the required TF-IDF matrix by fitting and transforming the data
|
16 |
+
tfidf_matrix = tfidf.fit_transform(movies['genres'])
|
17 |
+
|
18 |
+
# Compute the cosine similarity matrix
|
19 |
+
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
|
20 |
+
|
21 |
+
# Construct a reverse map of indices and movie titles
|
22 |
+
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
|
23 |
+
|
24 |
+
# Function that takes in movie title as input and outputs most similar movies
|
25 |
+
def get_recommendations(title, cosine_sim=cosine_sim):
|
26 |
+
# Get the index of the movie that matches the title
|
27 |
+
idx = indices[title]
|
28 |
+
|
29 |
+
# Get the pairwise similarity scores of all movies with that movie
|
30 |
+
sim_scores = list(enumerate(cosine_sim[idx]))
|
31 |
+
|
32 |
+
# Sort the movies based on the similarity scores
|
33 |
+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
34 |
+
|
35 |
+
# Get the scores of the 10 most similar movies
|
36 |
+
sim_scores = sim_scores[1:11]
|
37 |
+
|
38 |
+
# Get the movie indices
|
39 |
+
movie_indices = [i[0] for i in sim_scores]
|
40 |
+
|
41 |
+
# Return the top 10 most similar movies
|
42 |
+
return movies['title'].iloc[movie_indices]
|
43 |
+
|
44 |
+
# Gradio interface
|
45 |
+
def recommend_movies(movie):
|
46 |
+
recommendations = get_recommendations(movie)
|
47 |
+
return recommendations.tolist()
|
48 |
+
|
49 |
+
# Create the Gradio interface
|
50 |
+
movie_list = movies['title'].tolist()
|
51 |
+
iface = gr.Interface(fn=recommend_movies, inputs=gr.inputs.Dropdown(movie_list), outputs="text", title="Movie Recommender", description="Select a movie to get recommendations based on content filtering.")
|
52 |
+
|
53 |
+
# Launch the app
|
54 |
+
iface.launch()
|