import streamlit as st import pandas as pd from sklearn.preprocessing import MultiLabelBinarizer from sklearn.cluster import KMeans from sklearn.decomposition import PCA import matplotlib.pyplot as plt # Title st.title("Movie Recommendation") st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True) # Load dataset movies = pd.read_csv('movies.csv') movies = movies[movies['genres'] != '(no genres listed)'] movies['genres'] = movies['genres'].apply(lambda x: x.split('|')) # One-hot encode genres mlb = MultiLabelBinarizer() genre_matrix = mlb.fit_transform(movies['genres']) # Apply KMeans k = 10 model = KMeans(n_clusters=k, random_state=42) movies['cluster'] = model.fit_predict(genre_matrix) # Add PCA for 2D visualization pca = PCA(n_components=2) pca_result = pca.fit_transform(genre_matrix) movies['pca_x'] = pca_result[:, 0] movies['pca_y'] = pca_result[:, 1] # Streamlit Tabs tab1, tab2, tab3 = st.tabs(["📄 Dataset Overview", "📊 Clustering Visualization", "🎬 Movie Recommender"]) # Tab 1: Dataset Overview with tab1: st.header("🎥 Movie Dataset Overview") st.write("Total Movies:", len(movies)) st.dataframe(movies[['title', 'genres', 'cluster']].head(10)) # Tab 2: Visualization with tab2: st.header("🧠 Genre-Based Clustering (PCA Projection)") fig, ax = plt.subplots() scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6) ax.set_xlabel("PCA 1") ax.set_ylabel("PCA 2") ax.set_title("Movie Genre Clusters") st.pyplot(fig) # Tab 3: Movie Recommender with tab3: st.header("🎬 Movie Recommender (Unsupervised KMeans)") movie_options = sorted(movies['title'].unique()) selected_title = st.selectbox("Choose a movie:", movie_options) selected_movie = movies[movies['title'] == selected_title].iloc[0] st.success(f"You selected: {selected_movie['title']}") cluster_id = selected_movie['cluster'] recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])] st.subheader("📍 Recommended Movies (Same Cluster):") for title in recs['title'].head(10): st.write(f"- {title}")