Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

e95324b

verified ·

1 Parent(s): f96ec1f

Update model.py

Browse files

Files changed (1) hide show

model.py +55 -74

model.py CHANGED Viewed

@@ -1,97 +1,77 @@
-import pandas as pd
-import numpy as np
-from sklearn.decomposition import TruncatedSVD
-import time
-import gradio as gr
-from scipy.sparse import csr_matrix
 class MatrixFactorization:
-    def __init__(self, n_factors=50):
         self.n_factors = n_factors
         self.model = TruncatedSVD(n_components=n_factors, random_state=42)
         self.user_title_matrix = None
         self.titles_df = None
-        self.title_choices = None
-        self.columns = None
     def fit(self, df):
         print("Training model...")
         start_time = time.time()
-        # Get top 10000 songs by play count for better performance
-        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
-        top_songs = top_songs.nlargest(10000, 'play_count')
-        # Filter original dataframe
-        df_filtered = df[df['title'].isin(top_songs['title'])]
-        # Pre-compute formatted title choices for dropdown
-        self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
-        self.title_choices['display'] = self.title_choices.apply(
-            lambda x: f"{x['title']} • by {x['artist_name']}" +
-                     (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
-                      else f" [{int(x['year'])}]" if pd.notna(x['year'])
-                      else f" [{x['release']}]" if pd.notna(x['release'])
-                      else ""),
-            axis=1
-        )
-        # Create pivot table
         pivot = pd.pivot_table(
-            df_filtered,
             values='play_count',
             index='user',
             columns='title',
             fill_value=0
         )
-        self.columns = pivot.columns
-        # Use sparse matrix for efficiency
         self.user_title_matrix = csr_matrix(pivot.values)
-        # Train model
         self.user_vectors = self.model.fit_transform(self.user_title_matrix)
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
-        print(f"Number of songs available: {len(self.title_choices)}")
     def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
-            try:
-                actual_titles = [display.split(" • by ")[0] for display in selected_display_titles]
-                title_to_idx = {title: idx for idx, title in enumerate(self.user_title_matrix.columns)}
-                selected_indices = [title_to_idx[title] for title in actual_titles]
-                user_vector = np.zeros((1, self.n_factors))
-                for idx in selected_indices:
-                    user_vector += self.item_vectors[:, idx].reshape(1, -1)
-                user_vector = user_vector / len(selected_indices)
-                predicted_ratings = np.dot(user_vector, self.item_vectors)
-                predicted_ratings = predicted_ratings.flatten()
-                titles = self.user_title_matrix.columns
-                title_scores = [(title, score) for title, score in zip(titles, predicted_ratings)
-                              if title not in actual_titles]
-                recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
-                results = []
-                for title, score in recommendations:
-                    row = self.titles_df.loc[title]
-                    confidence = 30 + (score * 70)
-                    results.append([
-                        title,
-                        row['artist_name'],
-                        int(row['year']) if pd.notna(row['year']) else None,
-                        f"{min(max(confidence, 30), 100):.2f}%"
-                    ])
-                return results
-            except Exception as e:
-                print(f"Error in recommendations: {str(e)}")
-                return []
 def create_gradio_interface(mf_model):
     with gr.Blocks() as demo:
@@ -99,10 +79,11 @@ def create_gradio_interface(mf_model):
         # 🎵 Music Recommendation System 🎶
         ### Instructions:
-        1. 🔍 Search songs using title, artist, album, or year
-        2. 🎧 Select up to 5 songs from the dropdown
-        3. 👉 Click 'Get Recommendations' for similar songs
-        4. 📊 Results show song details with confidence scores
         """)
         with gr.Row():

 class MatrixFactorization:
+    def __init__(self, n_factors=100):
         self.n_factors = n_factors
         self.model = TruncatedSVD(n_components=n_factors, random_state=42)
         self.user_title_matrix = None
         self.titles_df = None
+        self.column_names = None
     def fit(self, df):
         print("Training model...")
         start_time = time.time()
+        # Create pivot table and store columns
         pivot = pd.pivot_table(
+            df,
             values='play_count',
             index='user',
             columns='title',
             fill_value=0
         )
+        self.column_names = pivot.columns
+        # Convert to sparse matrix
         self.user_title_matrix = csr_matrix(pivot.values)
+        self.titles_df = df.groupby('title').agg({
+            'artist_name': 'first',
+            'year': 'first',
+            'play_count': 'sum',
+            'release': 'first'
+        })
         self.user_vectors = self.model.fit_transform(self.user_title_matrix)
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
+        print(f"Matrix shape: {self.user_title_matrix.shape}")
+        print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
     def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
+        try:
+            actual_titles = [display.split(" • by ")[0] for display in selected_display_titles]
+            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
+            selected_indices = [title_to_idx[title] for title in actual_titles]
+            user_vector = np.zeros((1, self.n_factors))
+            for idx in selected_indices:
+                user_vector += self.item_vectors[:, idx].reshape(1, -1)
+            user_vector = user_vector / len(selected_indices)
+            scores = np.dot(user_vector, self.item_vectors).flatten()
+            # Create recommendations using stored column names
+            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
+                          if title not in actual_titles]
+            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
+            results = []
+            for title, score in recommendations:
+                row = self.titles_df.loc[title]
+                confidence = 30 + (score * 70)  # Scale to 30-100 range
+                results.append([
+                    title,
+                    row['artist_name'],
+                    int(row['year']) if pd.notna(row['year']) else None,
+                    f"{min(max(confidence, 30), 100):.2f}%"
+                ])
+            return results
+        except Exception as e:
+            print(f"Error in recommendations: {str(e)}")
+            return []
 def create_gradio_interface(mf_model):
     with gr.Blocks() as demo:
         # 🎵 Music Recommendation System 🎶
         ### Instructions:
+        1. ⏳ Given our large corpus of songs, it will take ~1 min to load
+        2. 🔍 Search songs using Song Title, Artist, Album, or Year
+        3. 🎧 Select up to 5 songs from the dropdown
+        4. 👉 Click 'Get Recommendations' for similar songs
+        5. 📊 Results show song details with confidence scores
         """)
         with gr.Row():