Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

98ae331

verified ·

1 Parent(s): d49e583

update model

Browse files

Files changed (1) hide show

model.py +29 -21

model.py CHANGED Viewed

@@ -18,21 +18,25 @@ class MatrixFactorization:
         print("Training model...")
         start_time = time.time()
-        # Get top 10000 songs by play count
         top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
         top_songs = top_songs.nlargest(10000, 'play_count')
-        # Filter original dataframe to only include top songs
         df_filtered = df[df['title'].isin(top_songs['title'])]
-        # Pre-compute title choices for dropdown
-        self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
         self.title_choices['display'] = self.title_choices.apply(
-            lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
             axis=1
         )
-        # Create pivot table and cache columns
         pivot = pd.pivot_table(
             df_filtered,
             values='play_count',
@@ -42,7 +46,7 @@ class MatrixFactorization:
         )
         self.columns = pivot.columns
-        # Convert to sparse matrix
         self.user_title_matrix = csr_matrix(pivot.values)
         # Train model
@@ -50,40 +54,35 @@ class MatrixFactorization:
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
-        print(f"Number of songs in dropdown: {len(self.title_choices)}")
     def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
         if not selected_titles:
             return []
         try:
-            # Extract titles from display format
             titles = [title.split(" • by ")[0] for title in selected_titles]
-            # Get indices of selected titles
             indices = [np.where(self.columns == title)[0][0] for title in titles]
-            # Calculate user vector
             user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
-            # Get predictions
             scores = np.dot(user_vector, self.item_vectors)
-            # Get top recommendations
             top_indices = np.argsort(scores)[::-1]
-            # Filter out selected titles
             recommendations = []
             count = 0
             for idx in top_indices:
                 title = self.columns[idx]
                 if title not in titles:
                     display = self.title_choices[self.title_choices['title'] == title].iloc[0]
                     recommendations.append([
                         title,
                         display['artist_name'],
                         int(display['year']) if pd.notna(display['year']) else None,
-                        f"{scores[idx] * 100:.2f}%"
                     ])
                     count += 1
                 if count >= n_recommendations:
@@ -97,7 +96,16 @@ class MatrixFactorization:
 def create_gradio_interface(mf_model):
     with gr.Blocks() as demo:
-        gr.Markdown("# Music Recommendation System")
         with gr.Row():
             input_songs = gr.Dropdown(
                 choices=sorted(mf_model.title_choices['display'].tolist()),
@@ -106,8 +114,8 @@ def create_gradio_interface(mf_model):
                 max_choices=5,
                 filterable=True
             )
-        with gr.Row():
-            recommend_btn = gr.Button("Get Recommendations")
             output_table = gr.DataFrame(
                 headers=["Song", "Artist", "Year", "Confidence"],
                 label="Recommendations"

         print("Training model...")
         start_time = time.time()
+        # Get top 10000 songs by play count for better performance
         top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
         top_songs = top_songs.nlargest(10000, 'play_count')
+        # Filter original dataframe
         df_filtered = df[df['title'].isin(top_songs['title'])]
+        # Pre-compute formatted title choices for dropdown
+        self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
         self.title_choices['display'] = self.title_choices.apply(
+            lambda x: f"{x['title']} • by {x['artist_name']}" +
+                     (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
+                      else f" [{int(x['year'])}]" if pd.notna(x['year'])
+                      else f" [{x['release']}]" if pd.notna(x['release'])
+                      else ""),
             axis=1
         )
+        # Create pivot table
         pivot = pd.pivot_table(
             df_filtered,
             values='play_count',
         )
         self.columns = pivot.columns
+        # Use sparse matrix for efficiency
         self.user_title_matrix = csr_matrix(pivot.values)
         # Train model
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
+        print(f"Number of songs available: {len(self.title_choices)}")
     def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
         if not selected_titles:
             return []
         try:
             titles = [title.split(" • by ")[0] for title in selected_titles]
             indices = [np.where(self.columns == title)[0][0] for title in titles]
+            # Calculate average user vector from selected songs
             user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
             scores = np.dot(user_vector, self.item_vectors)
+            # Get recommendations
             top_indices = np.argsort(scores)[::-1]
             recommendations = []
             count = 0
             for idx in top_indices:
                 title = self.columns[idx]
                 if title not in titles:
                     display = self.title_choices[self.title_choices['title'] == title].iloc[0]
+                    conf_score = max(min(scores[idx] * 100, 100), 30)
                     recommendations.append([
                         title,
                         display['artist_name'],
                         int(display['year']) if pd.notna(display['year']) else None,
+                        f"{conf_score:.2f}%"
                     ])
                     count += 1
                 if count >= n_recommendations:
 def create_gradio_interface(mf_model):
     with gr.Blocks() as demo:
+        gr.Markdown("""
+        # 🎵 Music Recommendation System 🎶
+        ### Instructions:
+        1. 🔍 Search songs using title, artist, album, or year
+        2. 🎧 Select up to 5 songs from the dropdown
+        3. 👉 Click 'Get Recommendations' for similar songs
+        4. 📊 Results show song details with confidence scores
+        """)
         with gr.Row():
             input_songs = gr.Dropdown(
                 choices=sorted(mf_model.title_choices['display'].tolist()),
                 max_choices=5,
                 filterable=True
             )
+        with gr.Column():
+            recommend_btn = gr.Button("Get Recommendations", size="lg")
             output_table = gr.DataFrame(
                 headers=["Song", "Artist", "Year", "Confidence"],
                 label="Recommendations"