import streamlit as st import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.neighbors import NearestNeighbors # Page config st.set_page_config( page_title="MusicMind - Smart Music Recommendations", page_icon="🎵", layout="wide" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Load and prepare data @st.cache_data def load_data(): df = pd.read_csv("song_dataset.csv") return df df = load_data() @st.cache_resource def run_imps(df): required_columns = ['user', 'song', 'play_count', 'title', 'artist_name', 'release'] if not all(col in df.columns for col in required_columns): raise ValueError(f"Dataset must contain the following columns: {required_columns}") df = df.drop_duplicates(subset=['song', 'title', 'artist_name', 'release']) df['combined_features'] = (df['title'] + " " + df['artist_name'] + " " + df['release']).fillna("") # Content-Based Filtering tfidf = TfidfVectorizer(max_features=5000, stop_words='english') tfidf_matrix = tfidf.fit_transform(df['combined_features']) nn = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto') nn.fit(tfidf_matrix) # Collaborative Filtering user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0) knn_cf = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto') knn_cf.fit(user_song_matrix) return df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf df = load_data() df, tfidf, tfidf_matrix, nn, user_song_matrix, knn_cf = run_imps(df) # Content-based recommendation function def content_based_recommend(song_title, top_n=5): try: idx = df[df['title'] == song_title].index[0] distances, indices = nn.kneighbors(tfidf_matrix[idx], n_neighbors=top_n + 1) song_indices = indices.flatten()[1:] return df.iloc[song_indices][['title', 'artist_name', 'release']].drop_duplicates() except IndexError: return pd.DataFrame(columns=['title', 'artist_name', 'release']) # Collaborative recommendation function using KNN def collaborative_recommend(user_id, top_n=5): if user_id not in user_song_matrix.index: return pd.DataFrame(columns=['title', 'artist_name', 'release']) # Get the nearest neighbors for the user user_index = user_song_matrix.index.get_loc(user_id) distances, indices = knn_cf.kneighbors(user_song_matrix.iloc[user_index].values.reshape(1, -1), n_neighbors=top_n + 1) # Collect recommendations from neighbors neighbors = indices.flatten()[1:] listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index recommendations = {} for neighbor in neighbors: neighbor_songs = user_song_matrix.iloc[neighbor] for song, play_count in neighbor_songs.items(): if song not in listened_songs and play_count > 0: recommendations[song] = recommendations.get(song, 0) + play_count # Sort songs by aggregated scores recommended_songs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n] recommended_song_ids = [song for song, _ in recommended_songs] return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates() # Hybrid Recommendation def hybrid_recommendv2(user_id, song_titles, top_n=5): collab_recs = collaborative_recommend(user_id, top_n) content_recs = pd.DataFrame() for song_title in song_titles: content_recs = pd.concat([content_recs, content_based_recommend(song_title, top_n)], ignore_index=True) hybrid_recs = pd.concat([collab_recs, content_recs]).drop_duplicates().sample(frac=1).reset_index(drop=True) return hybrid_recs.head(top_n) # Sidebar and Main UI with st.sidebar: st.header("🎯 Customize Your Recommendations") user_id = st.selectbox( "Select User ID", options=df['user'].unique(), index=0 ) user_songs = df[df['user'] == user_id]['title'].unique() song_title = st.multiselect( "Select Songs You Like", options=user_songs, default=user_songs[:1] if len(user_songs) > 0 else None ) top_n = st.slider("Number of Recommendations", min_value=1, max_value=10, value=5) get_recs = st.button("Get Recommendations! 🎶") if get_recs: st.header("🎵 Your Recommendations") recommendations = hybrid_recommendv2(user_id, song_title, top_n) if recommendations.empty: st.error("No recommendations found. Try selecting different songs or users.") else: st.balloons() for idx, row in recommendations.iterrows(): youtube_link = f"https://www.youtube.com/results?search_query={row['title']}+{row['artist_name']}" st.markdown(f"""

{row['title']}

Artist: {row['artist_name']}

Album: {row['release']}

Watch on YouTube

""", unsafe_allow_html=True)