jchen8000 commited on
Commit
bd9a4ce
·
verified ·
1 Parent(s): 553a8bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -7
app.py CHANGED
@@ -16,6 +16,64 @@ with zipfile.ZipFile('ml-latest-small.zip') as z:
16
  with z.open('ml-latest-small/ratings.csv') as f:
17
  ratings = pd.read_csv(f)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Create a movie-user matrix
20
  movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
21
 
@@ -68,13 +126,11 @@ total_movies = len(movies)
68
  # Update the Gradio interface
69
  with gr.Blocks() as iface:
70
  with gr.Tab("Content-Based Filtering"):
71
- gr.Markdown("## Recommendation - Collaborative Filtering")
72
- gr.Markdown("### In construction")
73
- # gr.Interface(fn=recommend_movies,
74
- # inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
75
- # outputs=[gr.Textbox(label="Recommended Movies:")],
76
- # title="Movie Recommender - Content-Based Filtering",
77
- # description="Select a movie to get recommendations based on content filtering.")
78
 
79
  with gr.Tab("Collaborative Filtering"):
80
  gr.Interface(fn=recommend_movies_cf,
 
16
  with z.open('ml-latest-small/ratings.csv') as f:
17
  ratings = pd.read_csv(f)
18
 
19
+ ######################################
20
+ #
21
+ # Content-based Filtering
22
+ #
23
+ ######################################
24
+
25
+ # Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
26
+ tfidf = TfidfVectorizer(stop_words='english')
27
+
28
+ # Replace NaN with an empty string
29
+ movies['genres'] = movies['genres'].fillna('')
30
+
31
+ # Construct the required TF-IDF matrix by fitting and transforming the data
32
+ tfidf_matrix = tfidf.fit_transform(movies['genres'])
33
+
34
+ # Compute the cosine similarity matrix
35
+ cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
36
+
37
+ # Construct a reverse map of indices and movie titles
38
+ indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
39
+
40
+ # Function that takes in movie title as input and outputs most similar movies
41
+ def get_cb_recommendations(title, cosine_sim=cosine_sim):
42
+
43
+ # Get the index of the movie that matches the title
44
+ idx = indices[title]
45
+
46
+ # Get the pairwise similarity scores of all movies with that movie
47
+ sim_scores = list(enumerate(cosine_sim[idx]))
48
+
49
+ # Sort the movies based on the similarity scores
50
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
51
+
52
+ # Get the scores of the 20 most similar movies
53
+ sim_scores = sim_scores[1:result_count]
54
+
55
+ # Get the movie indices
56
+ movie_indices = [i[0] for i in sim_scores]
57
+
58
+ # Return the top 20 most similar movies with their scores
59
+ recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
60
+ return recommendations
61
+
62
+ # Gradio interface
63
+ def recommend_movies_cb(movie):
64
+ if not movie:
65
+ return "No movie selected. Please select one from the dropdown."
66
+
67
+ recommendations = get_cb_recommendations(movie)
68
+ format_string = "{:>5.2f} {:<20}"
69
+ return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
70
+
71
+
72
+ ######################################
73
+ #
74
+ # Collaborative Filtering (Item-based)
75
+ #
76
+ ######################################
77
  # Create a movie-user matrix
78
  movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
79
 
 
126
  # Update the Gradio interface
127
  with gr.Blocks() as iface:
128
  with gr.Tab("Content-Based Filtering"):
129
+ gr.Interface(fn=recommend_movies_cb,
130
+ inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
131
+ outputs=[gr.Textbox(label="Recommended Movies:")],
132
+ title="Movie Recommender - Content-Based Filtering",
133
+ description="Select a movie to get recommendations based on content filtering.")
 
 
134
 
135
  with gr.Tab("Collaborative Filtering"):
136
  gr.Interface(fn=recommend_movies_cf,