File size: 4,626 Bytes
81b661c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d49e583
 
 
 
 
 
 
81b661c
d49e583
81b661c
 
 
 
 
 
 
d49e583
81b661c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d49e583
81b661c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix

class MatrixFactorization:
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_title_matrix = None
        self.titles_df = None
        self.title_choices = None
        self.columns = None
        
    def fit(self, df):
        print("Training model...")
        start_time = time.time()
        
        # Get top 10000 songs by play count
        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
        top_songs = top_songs.nlargest(10000, 'play_count')
        
        # Filter original dataframe to only include top songs
        df_filtered = df[df['title'].isin(top_songs['title'])]
        
        # Pre-compute title choices for dropdown
        self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
        self.title_choices['display'] = self.title_choices.apply(
            lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""), 
            axis=1
        )
        
        # Create pivot table and cache columns
        pivot = pd.pivot_table(
            df_filtered,
            values='play_count',
            index='user',
            columns='title',
            fill_value=0
        )
        self.columns = pivot.columns
        
        # Convert to sparse matrix
        self.user_title_matrix = csr_matrix(pivot.values)
        
        # Train model
        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
        self.item_vectors = self.model.components_
        
        print(f"Training completed in {time.time() - start_time:.2f} seconds")
        print(f"Number of songs in dropdown: {len(self.title_choices)}")
        
    def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
        if not selected_titles:
            return []
            
        try:
            # Extract titles from display format
            titles = [title.split(" • by ")[0] for title in selected_titles]
            
            # Get indices of selected titles
            indices = [np.where(self.columns == title)[0][0] for title in titles]
            
            # Calculate user vector
            user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
            
            # Get predictions
            scores = np.dot(user_vector, self.item_vectors)
            
            # Get top recommendations
            top_indices = np.argsort(scores)[::-1]
            
            # Filter out selected titles
            recommendations = []
            count = 0
            for idx in top_indices:
                title = self.columns[idx]
                if title not in titles:
                    display = self.title_choices[self.title_choices['title'] == title].iloc[0]
                    recommendations.append([
                        title,
                        display['artist_name'],
                        int(display['year']) if pd.notna(display['year']) else None,
                        f"{scores[idx] * 100:.2f}%"
                    ])
                    count += 1
                if count >= n_recommendations:
                    break
                    
            return recommendations
            
        except Exception as e:
            print(f"Error in recommendations: {str(e)}")
            return []

def create_gradio_interface(mf_model):
    with gr.Blocks() as demo:
        gr.Markdown("# Music Recommendation System")
        with gr.Row():
            input_songs = gr.Dropdown(
                choices=sorted(mf_model.title_choices['display'].tolist()),
                label="Select songs (up to 5)",
                multiselect=True,
                max_choices=5,
                filterable=True
            )
        with gr.Row():
            recommend_btn = gr.Button("Get Recommendations")
            output_table = gr.DataFrame(
                headers=["Song", "Artist", "Year", "Confidence"],
                label="Recommendations"
            )
        
        recommend_btn.click(
            fn=mf_model.get_recommendations_from_titles,
            inputs=input_songs,
            outputs=output_table
        )
    
    return demo