import pandas as pd import numpy as np from sklearn.decomposition import TruncatedSVD import time import gradio as gr from scipy.sparse import csr_matrix class MatrixFactorization: def __init__(self, n_factors=50): self.n_factors = n_factors self.model = TruncatedSVD(n_components=n_factors, random_state=42) self.user_title_matrix = None self.titles_df = None self.column_names = None self._cached_choices = None def fit(self, df): print("Training model...") start_time = time.time() # Get top 10000 songs top_songs = df.groupby('title')['play_count'].sum().nlargest(10000).index df_filtered = df[df['title'].isin(top_songs)] print("Filtered to 10000 most played songs") # Create pivot table pivot = pd.pivot_table( df_filtered, values='play_count', index='user', columns='title', fill_value=0 ) self.column_names = pivot.columns # Convert to sparse matrix self.user_title_matrix = csr_matrix(pivot.values) # Create titles dataframe self.titles_df = df_filtered.groupby('title').agg({ 'artist_name': 'first', 'year': 'first', 'play_count': 'sum', 'release': 'first' }) print("Training SVD model...") self.user_vectors = self.model.fit_transform(self.user_title_matrix) self.item_vectors = self.model.components_ # Cache choices self._cached_choices = self.create_title_choices() print(f"Training completed in {time.time() - start_time:.2f} seconds") def get_recommendations(self, selected_titles): if not selected_titles: return [] try: actual_titles = [title.split(" • by ")[0] for title in selected_titles] title_to_idx = {title: idx for idx, title in enumerate(self.column_names)} selected_indices = [title_to_idx[title] for title in actual_titles] user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0) scores = np.dot(user_vector, self.item_vectors) title_scores = [(title, score) for title, score in zip(self.column_names, scores) if title not in actual_titles] recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5] results = [] for title, score in recommendations: row = self.titles_df.loc[title] confidence = 30 + (score * 70) results.append([ title, row['artist_name'], int(row['year']) if pd.notna(row['year']) else None, f"{min(max(confidence, 30), 100):.2f}%" ]) return results except Exception as e: print(f"Error in recommendations: {str(e)}") return [] def create_title_choices(self): title_choices = [] for title, row in self.titles_df.iterrows(): display_text = f"{title} • by {row['artist_name']}" extra_info = [] if pd.notna(row['release']): extra_info.append(row['release']) if pd.notna(row['year']): extra_info.append(str(int(row['year']))) if extra_info: display_text += f" [{', '.join(extra_info)}]" title_choices.append(display_text) return sorted(title_choices) def create_gradio_interface(mf_model): try: with gr.Blocks() as demo: gr.Markdown("""# 🎵 Music Recommendation System 🎶 ### Instructions: 1. ⏳ Model loads songs (~1 min) 2. 🔍 Search by title, artist, album, or year 3. 🎧 Select up to 5 songs 4. 👉 Click for recommendations 5. 📊 View confidence scores (30-100%) """) with gr.Row(): input_songs = gr.Dropdown( choices=mf_model._cached_choices, label="Search and select songs (up to 5)", info="Format: Title • by Artist [Album, Year]", multiselect=True, max_choices=5, filterable=True ) with gr.Column(): recommend_btn = gr.Button("Get Recommendations", size="lg") output_table = gr.DataFrame( headers=["Song", "Artist", "Year", "Confidence"], label="Recommended Songs" ) recommend_btn.click( fn=mf_model.get_recommendations, inputs=input_songs, outputs=output_table ) return demo except Exception as e: print(f"Error creating interface: {str(e)}") return None