Spaces:

GouthamVarma
/

Spotify

Sleeping

File size: 4,626 Bytes

import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix

class MatrixFactorization:
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_title_matrix = None
        self.titles_df = None
        self.title_choices = None
        self.columns = None
        
    def fit(self, df):
        print("Training model...")
        start_time = time.time()
        
        # Get top 10000 songs by play count
        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
        top_songs = top_songs.nlargest(10000, 'play_count')
        
        # Filter original dataframe to only include top songs
        df_filtered = df[df['title'].isin(top_songs['title'])]
        
        # Pre-compute title choices for dropdown
        self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
        self.title_choices['display'] = self.title_choices.apply(
            lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""), 
            axis=1
        )
        
        # Create pivot table and cache columns
        pivot = pd.pivot_table(
            df_filtered,
            values='play_count',
            index='user',
            columns='title',
            fill_value=0
        )
        self.columns = pivot.columns
        
        # Convert to sparse matrix
        self.user_title_matrix = csr_matrix(pivot.values)
        
        # Train model
        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
        self.item_vectors = self.model.components_
        
        print(f"Training completed in {time.time() - start_time:.2f} seconds")
        print(f"Number of songs in dropdown: {len(self.title_choices)}")
        
    def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
        if not selected_titles:
            return []
            
        try:
            # Extract titles from display format
            titles = [title.split(" • by ")[0] for title in selected_titles]
            
            # Get indices of selected titles
            indices = [np.where(self.columns == title)[0][0] for title in titles]
            
            # Calculate user vector
            user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
            
            # Get predictions
            scores = np.dot(user_vector, self.item_vectors)
            
            # Get top recommendations
            top_indices = np.argsort(scores)[::-1]
            
            # Filter out selected titles
            recommendations = []
            count = 0
            for idx in top_indices:
                title = self.columns[idx]
                if title not in titles:
                    display = self.title_choices[self.title_choices['title'] == title].iloc[0]
                    recommendations.append([
                        title,
                        display['artist_name'],
                        int(display['year']) if pd.notna(display['year']) else None,
                        f"{scores[idx] * 100:.2f}%"
                    ])
                    count += 1
                if count >= n_recommendations:
                    break
                    
            return recommendations
            
        except Exception as e:
            print(f"Error in recommendations: {str(e)}")
            return []

def create_gradio_interface(mf_model):
    with gr.Blocks() as demo:
        gr.Markdown("# Music Recommendation System")
        with gr.Row():
            input_songs = gr.Dropdown(
                choices=sorted(mf_model.title_choices['display'].tolist()),
                label="Select songs (up to 5)",
                multiselect=True,
                max_choices=5,
                filterable=True
            )
        with gr.Row():
            recommend_btn = gr.Button("Get Recommendations")
            output_table = gr.DataFrame(
                headers=["Song", "Artist", "Year", "Confidence"],
                label="Recommendations"
            )
        
        recommend_btn.click(
            fn=mf_model.get_recommendations_from_titles,
            inputs=input_songs,
            outputs=output_table
        )
    
    return demo