File size: 4,189 Bytes
81b661c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix

class MatrixFactorization:
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_title_matrix = None
        self.titles_df = None
        self.title_choices = None
        self.columns = None
        
    def fit(self, df):
        print("Training model...")
        start_time = time.time()
        
        # Pre-compute title choices for dropdown
        self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index()
        self.title_choices['display'] = self.title_choices.apply(
            lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""), 
            axis=1
        )
        
        # Create pivot table and cache columns
        pivot = pd.pivot_table(
            df,
            values='play_count',
            index='user',
            columns='title',
            fill_value=0
        )
        self.columns = pivot.columns
        
        # Convert to sparse matrix
        self.user_title_matrix = csr_matrix(pivot.values)
        
        # Train model
        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
        self.item_vectors = self.model.components_
        
        print(f"Training completed in {time.time() - start_time:.2f} seconds")
        
    def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
        if not selected_titles:
            return []
            
        try:
            # Extract titles from display format
            titles = [title.split(" • by ")[0] for title in selected_titles]
            
            # Get indices of selected titles
            indices = [np.where(self.columns == title)[0][0] for title in titles]
            
            # Calculate user vector
            user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
            
            # Get predictions
            scores = np.dot(user_vector, self.item_vectors)
            
            # Get top recommendations
            top_indices = np.argsort(scores)[::-1]
            
            # Filter out selected titles
            recommendations = []
            count = 0
            for idx in top_indices:
                title = self.columns[idx]
                if title not in titles:
                    display = self.title_choices[self.title_choices['title'] == title].iloc[0]
                    recommendations.append([
                        title,
                        display['artist_name'],
                        int(display['year']) if pd.notna(display['year']) else None,
                        f"{scores[idx] * 100:.2f}%"
                    ])
                    count += 1
                if count >= n_recommendations:
                    break
                    
            return recommendations
            
        except Exception as e:
            print(f"Error in recommendations: {str(e)}")
            return []

def create_gradio_interface(mf_model):
    with gr.Blocks() as demo:
        gr.Markdown("# Music Recommendation System")
        with gr.Row():
            input_songs = gr.Dropdown(
                choices=sorted(mf_model.title_choices['display'].tolist()),
                label="Select songs (up to 5)",
                multiselect=True,
                max_choices=5,
                filterable=True
            )
        with gr.Row():
            recommend_btn = gr.Button("Get Recommendations")
            output_table = gr.DataFrame(
                headers=["Song", "Artist", "Year", "Confidence"],
                label="Recommendations"
            )
        
        recommend_btn.click(
            fn=mf_model.get_recommendations_from_titles,
            inputs=input_songs,
            outputs=output_table
        )
    
    return demo