|
import streamlit as st |
|
import pandas as pd |
|
from sklearn.preprocessing import MultiLabelBinarizer |
|
from sklearn.cluster import KMeans |
|
from sklearn.decomposition import PCA |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
st.title("Movie Recommendation") |
|
st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True) |
|
|
|
|
|
movies = pd.read_csv('movies.csv') |
|
movies = movies[movies['genres'] != '(no genres listed)'] |
|
movies['genres'] = movies['genres'].apply(lambda x: x.split('|')) |
|
|
|
|
|
mlb = MultiLabelBinarizer() |
|
genre_matrix = mlb.fit_transform(movies['genres']) |
|
|
|
|
|
k = 10 |
|
model = KMeans(n_clusters=k, random_state=42) |
|
movies['cluster'] = model.fit_predict(genre_matrix) |
|
|
|
|
|
pca = PCA(n_components=2) |
|
pca_result = pca.fit_transform(genre_matrix) |
|
movies['pca_x'] = pca_result[:, 0] |
|
movies['pca_y'] = pca_result[:, 1] |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["π Dataset Overview", "π Clustering Visualization", "π¬ Movie Recommender"]) |
|
|
|
|
|
with tab1: |
|
st.header("π₯ Movie Dataset Overview") |
|
st.write("Total Movies:", len(movies)) |
|
st.dataframe(movies[['title', 'genres', 'cluster']].head(10)) |
|
|
|
|
|
with tab2: |
|
st.header("π§ Genre-Based Clustering (PCA Projection)") |
|
fig, ax = plt.subplots() |
|
scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6) |
|
ax.set_xlabel("PCA 1") |
|
ax.set_ylabel("PCA 2") |
|
ax.set_title("Movie Genre Clusters") |
|
st.pyplot(fig) |
|
|
|
|
|
with tab3: |
|
st.header("π¬ Movie Recommender (Unsupervised KMeans)") |
|
|
|
movie_options = sorted(movies['title'].unique()) |
|
selected_title = st.selectbox("Choose a movie:", movie_options) |
|
|
|
selected_movie = movies[movies['title'] == selected_title].iloc[0] |
|
st.success(f"You selected: {selected_movie['title']}") |
|
|
|
cluster_id = selected_movie['cluster'] |
|
recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])] |
|
|
|
st.subheader("π Recommended Movies (Same Cluster):") |
|
for title in recs['title'].head(10): |
|
st.write(f"- {title}") |
|
|