File size: 2,231 Bytes
d3bd225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Title
st.title("Movie Recommendation")
st.markdown("#### Recommend Movies in the Same Cluster of Genre using K-Means Clustering", unsafe_allow_html=True)

# Load dataset
movies = pd.read_csv('movies.csv')
movies = movies[movies['genres'] != '(no genres listed)']
movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))

# One-hot encode genres
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(movies['genres'])

# Apply KMeans
k = 10
model = KMeans(n_clusters=k, random_state=42)
movies['cluster'] = model.fit_predict(genre_matrix)

# Add PCA for 2D visualization
pca = PCA(n_components=2)
pca_result = pca.fit_transform(genre_matrix)
movies['pca_x'] = pca_result[:, 0]
movies['pca_y'] = pca_result[:, 1]

# Streamlit Tabs
tab1, tab2, tab3 = st.tabs(["πŸ“„ Dataset Overview", "πŸ“Š Clustering Visualization", "🎬 Movie Recommender"])

# Tab 1: Dataset Overview
with tab1:
    st.header("πŸŽ₯ Movie Dataset Overview")
    st.write("Total Movies:", len(movies))
    st.dataframe(movies[['title', 'genres', 'cluster']].head(10))

# Tab 2: Visualization
with tab2:
    st.header("🧠 Genre-Based Clustering (PCA Projection)")
    fig, ax = plt.subplots()
    scatter = ax.scatter(movies['pca_x'], movies['pca_y'], c=movies['cluster'], cmap='tab10', alpha=0.6)
    ax.set_xlabel("PCA 1")
    ax.set_ylabel("PCA 2")
    ax.set_title("Movie Genre Clusters")
    st.pyplot(fig)

# Tab 3: Movie Recommender
with tab3:
    st.header("🎬 Movie Recommender (Unsupervised KMeans)")

    movie_options = sorted(movies['title'].unique())
    selected_title = st.selectbox("Choose a movie:", movie_options)

    selected_movie = movies[movies['title'] == selected_title].iloc[0]
    st.success(f"You selected: {selected_movie['title']}")

    cluster_id = selected_movie['cluster']
    recs = movies[(movies['cluster'] == cluster_id) & (movies['title'] != selected_movie['title'])]

    st.subheader("πŸ“ Recommended Movies (Same Cluster):")
    for title in recs['title'].head(10):
        st.write(f"- {title}")