import pandas as pd import numpy as np from sklearn.decomposition import TruncatedSVD import time import gradio as gr from scipy.sparse import csr_matrix class MatrixFactorization: def __init__(self, n_factors=50): self.n_factors = n_factors self.model = TruncatedSVD(n_components=n_factors, random_state=42) self.user_title_matrix = None self.titles_df = None self.title_choices = None self.columns = None def fit(self, df): print("Training model...") start_time = time.time() # Pre-compute title choices for dropdown self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index() self.title_choices['display'] = self.title_choices.apply( lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""), axis=1 ) # Create pivot table and cache columns pivot = pd.pivot_table( df, values='play_count', index='user', columns='title', fill_value=0 ) self.columns = pivot.columns # Convert to sparse matrix self.user_title_matrix = csr_matrix(pivot.values) # Train model self.user_vectors = self.model.fit_transform(self.user_title_matrix) self.item_vectors = self.model.components_ print(f"Training completed in {time.time() - start_time:.2f} seconds") def get_recommendations_from_titles(self, selected_titles, n_recommendations=5): if not selected_titles: return [] try: # Extract titles from display format titles = [title.split(" • by ")[0] for title in selected_titles] # Get indices of selected titles indices = [np.where(self.columns == title)[0][0] for title in titles] # Calculate user vector user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0) # Get predictions scores = np.dot(user_vector, self.item_vectors) # Get top recommendations top_indices = np.argsort(scores)[::-1] # Filter out selected titles recommendations = [] count = 0 for idx in top_indices: title = self.columns[idx] if title not in titles: display = self.title_choices[self.title_choices['title'] == title].iloc[0] recommendations.append([ title, display['artist_name'], int(display['year']) if pd.notna(display['year']) else None, f"{scores[idx] * 100:.2f}%" ]) count += 1 if count >= n_recommendations: break return recommendations except Exception as e: print(f"Error in recommendations: {str(e)}") return [] def create_gradio_interface(mf_model): with gr.Blocks() as demo: gr.Markdown("# Music Recommendation System") with gr.Row(): input_songs = gr.Dropdown( choices=sorted(mf_model.title_choices['display'].tolist()), label="Select songs (up to 5)", multiselect=True, max_choices=5, filterable=True ) with gr.Row(): recommend_btn = gr.Button("Get Recommendations") output_table = gr.DataFrame( headers=["Song", "Artist", "Year", "Confidence"], label="Recommendations" ) recommend_btn.click( fn=mf_model.get_recommendations_from_titles, inputs=input_songs, outputs=output_table ) return demo