jchen8000 commited on
Commit
725c528
·
verified ·
1 Parent(s): 3e87f9d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import linear_kernel
4
+ import gradio as gr
5
+
6
+ # Load the MovieLens dataset
7
+ movies = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-latest-small.zip', compression='zip')
8
+
9
+ # Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
10
+ tfidf = TfidfVectorizer(stop_words='english')
11
+
12
+ # Replace NaN with an empty string
13
+ movies['genres'] = movies['genres'].fillna('')
14
+
15
+ # Construct the required TF-IDF matrix by fitting and transforming the data
16
+ tfidf_matrix = tfidf.fit_transform(movies['genres'])
17
+
18
+ # Compute the cosine similarity matrix
19
+ cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
20
+
21
+ # Construct a reverse map of indices and movie titles
22
+ indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
23
+
24
+ # Function that takes in movie title as input and outputs most similar movies
25
+ def get_recommendations(title, cosine_sim=cosine_sim):
26
+ # Get the index of the movie that matches the title
27
+ idx = indices[title]
28
+
29
+ # Get the pairwise similarity scores of all movies with that movie
30
+ sim_scores = list(enumerate(cosine_sim[idx]))
31
+
32
+ # Sort the movies based on the similarity scores
33
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
34
+
35
+ # Get the scores of the 10 most similar movies
36
+ sim_scores = sim_scores[1:11]
37
+
38
+ # Get the movie indices
39
+ movie_indices = [i[0] for i in sim_scores]
40
+
41
+ # Return the top 10 most similar movies
42
+ return movies['title'].iloc[movie_indices]
43
+
44
+ # Gradio interface
45
+ def recommend_movies(movie):
46
+ recommendations = get_recommendations(movie)
47
+ return recommendations.tolist()
48
+
49
+ # Create the Gradio interface
50
+ movie_list = movies['title'].tolist()
51
+ iface = gr.Interface(fn=recommend_movies, inputs=gr.inputs.Dropdown(movie_list), outputs="text", title="Movie Recommender", description="Select a movie to get recommendations based on content filtering.")
52
+
53
+ # Launch the app
54
+ iface.launch()