File size: 5,106 Bytes
f7c6ca0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f80207b
f7c6ca0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f80207b
 
f7c6ca0
 
 
 
 
 
 
 
 
 
 
f80207b
f7c6ca0
 
f80207b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7c6ca0
 
f80207b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix

class MatrixFactorization:
    def __init__(self, n_factors=100):
        self.n_factors = n_factors
        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
        self.user_title_matrix = None
        self.titles_df = None
        self.column_names = None
        
    def fit(self, df):
        print("Training model...")
        start_time = time.time()
        
        pivot = pd.pivot_table(
            df,
            values='play_count',
            index='user',
            columns='title',
            fill_value=0
        )
        self.column_names = pivot.columns
        
        self.user_title_matrix = csr_matrix(pivot.values)
        
        self.titles_df = df.groupby('title').agg({
            'artist_name': 'first',
            'year': 'first',
            'play_count': 'sum',
            'release': 'first'
        })
        
        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
        self.item_vectors = self.model.components_
        
        print(f"Training completed in {time.time() - start_time:.2f} seconds")
        print(f"Matrix shape: {self.user_title_matrix.shape}")
        print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")

    def get_recommendations_from_titles(self, selected_titles): 
        if not selected_titles:
            return []
            
        try:
            actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
            selected_indices = [title_to_idx[title] for title in actual_titles]
            
            user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
            scores = np.dot(user_vector, self.item_vectors)
            
            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
                           if title not in actual_titles]
            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
            
            results = []
            for title, score in recommendations:
                row = self.titles_df.loc[title]
                confidence = 30 + (score * 70)
                results.append([
                    title,
                    row['artist_name'],
                    int(row['year']) if pd.notna(row['year']) else None,
                    f"{min(max(confidence, 30), 100):.2f}%"
                ])
            return results
            
        except Exception as e:
            print(f"Error in recommendations: {str(e)}")
            return []
    
    def create_title_choices(self):  
        title_choices = []
        for title, row in self.titles_df.iterrows():
            display_text = f"{title} β€’ by {row['artist_name']}"
            extra_info = []
            if pd.notna(row['release']): 
                extra_info.append(row['release'])
            if pd.notna(row['year']): 
                extra_info.append(str(int(row['year'])))
            if extra_info:
                display_text += f" [{', '.join(extra_info)}]"
            title_choices.append(display_text)
        return title_choices

def create_gradio_interface(mf_model):
    try:
        with gr.Blocks() as demo:
            gr.Markdown("""# 🎡 Music Recommendation System 🎢

            

            ### Instructions:

            1. ⏳ Given our large corpus, it will take ~1 min to load the model

            2. πŸ” Search songs using title, artist, album, or year

            3. 🎧 Select up to 5 songs from the dropdown

            4. πŸ‘‰ Click 'Get Recommendations' for similar songs

            5. πŸ“Š Results show song details with confidence scores (30-100%)

            """)
            
            with gr.Row():
                input_songs = gr.Dropdown(
                    choices=sorted(mf_model.create_title_choices()),
                    label="Search and select songs (up to 5)",
                    info="Format: Title β€’ by Artist [Album, Year]",
                    multiselect=True,
                    max_choices=5,
                    filterable=True
                )
            
            with gr.Column():
                recommend_btn = gr.Button("Get Recommendations", size="lg")
                output_table = gr.DataFrame(
                    headers=["Song", "Artist", "Year", "Confidence"],
                    label="Recommended Songs"
                )
            
            recommend_btn.click(
                fn=mf_model.get_recommendations_from_titles,
                inputs=input_songs,
                outputs=output_table
            )
        
        return demo
    except Exception as e:
        print(f"Error creating interface: {str(e)}")
        return None