Michael Rey
added latest changes
d3bd225
raw
history blame
2.23 kB
import streamlit as st
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# Title
st.title("Movie Recommendation")
st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True)
# Load dataset
movies = pd.read_csv('movies.csv')
movies = movies[movies['genres'] != '(no genres listed)']
movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
# One-hot encode genres
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(movies['genres'])
# Apply KMeans
k = 10
model = KMeans(n_clusters=k, random_state=42)
movies['cluster'] = model.fit_predict(genre_matrix)
# Add PCA for 2D visualization
pca = PCA(n_components=2)
pca_result = pca.fit_transform(genre_matrix)
movies['pca_x'] = pca_result[:, 0]
movies['pca_y'] = pca_result[:, 1]
# Streamlit Tabs
tab1, tab2, tab3 = st.tabs(["πŸ“„ Dataset Overview", "πŸ“Š Clustering Visualization", "🎬 Movie Recommender"])
# Tab 1: Dataset Overview
with tab1:
st.header("πŸŽ₯ Movie Dataset Overview")
st.write("Total Movies:", len(movies))
st.dataframe(movies[['title', 'genres', 'cluster']].head(10))
# Tab 2: Visualization
with tab2:
st.header("🧠 Genre-Based Clustering (PCA Projection)")
fig, ax = plt.subplots()
scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6)
ax.set_xlabel("PCA 1")
ax.set_ylabel("PCA 2")
ax.set_title("Movie Genre Clusters")
st.pyplot(fig)
# Tab 3: Movie Recommender
with tab3:
st.header("🎬 Movie Recommender (Unsupervised KMeans)")
movie_options = sorted(movies['title'].unique())
selected_title = st.selectbox("Choose a movie:", movie_options)
selected_movie = movies[movies['title'] == selected_title].iloc[0]
st.success(f"You selected: {selected_movie['title']}")
cluster_id = selected_movie['cluster']
recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])]
st.subheader("πŸ“ Recommended Movies (Same Cluster):")
for title in recs['title'].head(10):
st.write(f"- {title}")