Michael Rey commited on
Commit
d3bd225
Β·
1 Parent(s): d74d55e

added latest changes

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +66 -0
  3. movies.csv +0 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: City Clustering Using DBSCAN
3
  emoji: πŸ”₯
4
  colorFrom: green
5
  colorTo: red
 
1
  ---
2
+ title: Movie Recommender Using K-Means Clustering
3
  emoji: πŸ”₯
4
  colorFrom: green
5
  colorTo: red
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.preprocessing import MultiLabelBinarizer
4
+ from sklearn.cluster import KMeans
5
+ from sklearn.decomposition import PCA
6
+ import matplotlib.pyplot as plt
7
+
8
+ # Title
9
+ st.title("Movie Recommendation")
10
+ st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True)
11
+
12
+ # Load dataset
13
+ movies = pd.read_csv('movies.csv')
14
+ movies = movies[movies['genres'] != '(no genres listed)']
15
+ movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
16
+
17
+ # One-hot encode genres
18
+ mlb = MultiLabelBinarizer()
19
+ genre_matrix = mlb.fit_transform(movies['genres'])
20
+
21
+ # Apply KMeans
22
+ k = 10
23
+ model = KMeans(n_clusters=k, random_state=42)
24
+ movies['cluster'] = model.fit_predict(genre_matrix)
25
+
26
+ # Add PCA for 2D visualization
27
+ pca = PCA(n_components=2)
28
+ pca_result = pca.fit_transform(genre_matrix)
29
+ movies['pca_x'] = pca_result[:, 0]
30
+ movies['pca_y'] = pca_result[:, 1]
31
+
32
+ # Streamlit Tabs
33
+ tab1, tab2, tab3 = st.tabs(["πŸ“„ Dataset Overview", "πŸ“Š Clustering Visualization", "🎬 Movie Recommender"])
34
+
35
+ # Tab 1: Dataset Overview
36
+ with tab1:
37
+ st.header("πŸŽ₯ Movie Dataset Overview")
38
+ st.write("Total Movies:", len(movies))
39
+ st.dataframe(movies[['title', 'genres', 'cluster']].head(10))
40
+
41
+ # Tab 2: Visualization
42
+ with tab2:
43
+ st.header("🧠 Genre-Based Clustering (PCA Projection)")
44
+ fig, ax = plt.subplots()
45
+ scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6)
46
+ ax.set_xlabel("PCA 1")
47
+ ax.set_ylabel("PCA 2")
48
+ ax.set_title("Movie Genre Clusters")
49
+ st.pyplot(fig)
50
+
51
+ # Tab 3: Movie Recommender
52
+ with tab3:
53
+ st.header("🎬 Movie Recommender (Unsupervised KMeans)")
54
+
55
+ movie_options = sorted(movies['title'].unique())
56
+ selected_title = st.selectbox("Choose a movie:", movie_options)
57
+
58
+ selected_movie = movies[movies['title'] == selected_title].iloc[0]
59
+ st.success(f"You selected: {selected_movie['title']}")
60
+
61
+ cluster_id = selected_movie['cluster']
62
+ recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])]
63
+
64
+ st.subheader("πŸ“ Recommended Movies (Same Cluster):")
65
+ for title in recs['title'].head(10):
66
+ st.write(f"- {title}")
movies.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ scikit-learn
4
+ matplotlib