File size: 5,197 Bytes
eefe640
 
 
 
 
 
 
 
ac960a0
eefe640
 
 
 
 
 
 
 
 
 
 
ac960a0
 
eefe640
ac960a0
eefe640
ac960a0
eefe640
 
 
 
 
 
 
 
ac960a0
 
eefe640
 
ac960a0
eefe640
 
 
 
 
 
 
 
 
 
 
ac960a0
 
eefe640
 
 
ac960a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eefe640
 
 
 
 
 
 
 
 
ac960a0
 
eefe640
 
 
 
 
 
 
3403e78
eefe640
 
 
 
 
 
 
 
ac960a0
eefe640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac960a0
eefe640
 
 
 
 
 
 
f80207b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix

class MatrixFactorization:
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_title_matrix = None
        self.titles_df = None
        self.column_names = None
        self._cached_choices = None
        
    def fit(self, df):
        print("Training model...")
        start_time = time.time()
        
        # Get top 10000 songs
        top_songs = df.groupby('title')['play_count'].sum().nlargest(10000).index
        df_filtered = df[df['title'].isin(top_songs)]
        print("Filtered to 10000 most played songs")
        
        # Create pivot table
        pivot = pd.pivot_table(
            df_filtered,
            values='play_count',
            index='user',
            columns='title',
            fill_value=0
        )
        self.column_names = pivot.columns
        
        # Convert to sparse matrix
        self.user_title_matrix = csr_matrix(pivot.values)
        
        # Create titles dataframe
        self.titles_df = df_filtered.groupby('title').agg({
            'artist_name': 'first',
            'year': 'first',
            'play_count': 'sum',
            'release': 'first'
        })
        
        print("Training SVD model...")
        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
        self.item_vectors = self.model.components_
        
        # Cache choices
        self._cached_choices = self.create_title_choices()
        
        print(f"Training completed in {time.time() - start_time:.2f} seconds")
        
    def get_recommendations(self, selected_titles):
        if not selected_titles:
            return []
            
        try:
            actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
            selected_indices = [title_to_idx[title] for title in actual_titles]
            
            user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
            scores = np.dot(user_vector, self.item_vectors)
            
            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
                           if title not in actual_titles]
            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
            
            results = []
            for title, score in recommendations:
                row = self.titles_df.loc[title]
                confidence = 30 + (score * 70)
                results.append([
                    title,
                    row['artist_name'],
                    int(row['year']) if pd.notna(row['year']) else None,
                    f"{min(max(confidence, 30), 100):.2f}%"
                ])
            return results
            
        except Exception as e:
            print(f"Error in recommendations: {str(e)}")
            return []
    
    def create_title_choices(self):
        title_choices = []
        for title, row in self.titles_df.iterrows():
            display_text = f"{title} β€’ by {row['artist_name']}"
            extra_info = []
            if pd.notna(row['release']): 
                extra_info.append(row['release'])
            if pd.notna(row['year']): 
                extra_info.append(str(int(row['year'])))
            if extra_info:
                display_text += f" [{', '.join(extra_info)}]"
            title_choices.append(display_text)
        return sorted(title_choices)

def create_gradio_interface(mf_model):
    try:
        with gr.Blocks() as demo:
            gr.Markdown("""# 🎡 Music Recommendation System 🎢
            
            ### Instructions:
            1. ⏳ Model loads songs (~1 min)
            2. πŸ” Search by title, artist, album, or year
            3. 🎧 Select up to 5 songs
            4. πŸ‘‰ Click for recommendations
            5. πŸ“Š View confidence scores (30-100%)
            """)
            
            with gr.Row():
                input_songs = gr.Dropdown(
                    choices=mf_model._cached_choices,
                    label="Search and select songs (up to 5)",
                    info="Format: Title β€’ by Artist [Album, Year]",
                    multiselect=True,
                    max_choices=5,
                    filterable=True
                )
            
            with gr.Column():
                recommend_btn = gr.Button("Get Recommendations", size="lg")
                output_table = gr.DataFrame(
                    headers=["Song", "Artist", "Year", "Confidence"],
                    label="Recommended Songs"
                )
            
            recommend_btn.click(
                fn=mf_model.get_recommendations,
                inputs=input_songs,
                outputs=output_table
            )
        
        return demo
    except Exception as e:
        print(f"Error creating interface: {str(e)}")
        return None