Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

ac960a0

verified ·

1 Parent(s): eefe640

Update model.py

Browse files

Files changed (1) hide show

model.py +49 -21

model.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 from scipy.sparse import csr_matrix
 class MatrixFactorization:
-    def __init__(self, n_factors=50):  # Reduced factors
         self.n_factors = n_factors
         self.model = TruncatedSVD(n_components=n_factors, random_state=42)
         self.user_title_matrix = None
@@ -18,16 +18,12 @@ class MatrixFactorization:
         print("Training model...")
         start_time = time.time()
-        # Get top songs by play count
-        top_songs = (df.groupby('title')['play_count']
-                    .sum()
-                    .sort_values(ascending=False)
-                    .head(10000)
-                    .index)
         df_filtered = df[df['title'].isin(top_songs)]
-        print(f"Filtered to {len(top_songs)} most played songs")
         pivot = pd.pivot_table(
             df_filtered,
             values='play_count',
@@ -36,8 +32,11 @@ class MatrixFactorization:
             fill_value=0
         )
         self.column_names = pivot.columns
         self.user_title_matrix = csr_matrix(pivot.values)
         self.titles_df = df_filtered.groupby('title').agg({
             'artist_name': 'first',
             'year': 'first',
@@ -49,13 +48,45 @@ class MatrixFactorization:
         self.user_vectors = self.model.fit_transform(self.user_title_matrix)
         self.item_vectors = self.model.components_
-        # Pre-cache choices
-        self._cached_choices = self._generate_choices()
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
-    def _generate_choices(self):
-        choices = []
         for title, row in self.titles_df.iterrows():
             display_text = f"{title} • by {row['artist_name']}"
             extra_info = []
@@ -65,11 +96,8 @@ class MatrixFactorization:
                 extra_info.append(str(int(row['year'])))
             if extra_info:
                 display_text += f" [{', '.join(extra_info)}]"
-            choices.append(display_text)
-        return sorted(choices)
-    def create_title_choices(self):
-        return self._cached_choices if self._cached_choices else self._generate_choices()
 def create_gradio_interface(mf_model):
     try:
@@ -77,7 +105,7 @@ def create_gradio_interface(mf_model):
             gr.Markdown("""# 🎵 Music Recommendation System 🎶
             ### Instructions:
-            1. ⏳ Loading ~10,000 most popular songs
             2. 🔍 Search by title, artist, album, or year
             3. 🎧 Select up to 5 songs
             4. 👉 Click for recommendations
@@ -86,7 +114,7 @@ def create_gradio_interface(mf_model):
             with gr.Row():
                 input_songs = gr.Dropdown(
-                    choices=mf_model.create_title_choices(),
                     label="Search and select songs (up to 5)",
                     info="Format: Title • by Artist [Album, Year]",
                     multiselect=True,
@@ -102,7 +130,7 @@ def create_gradio_interface(mf_model):
                 )
             recommend_btn.click(
-                fn=mf_model.get_recommendations_from_titles,
                 inputs=input_songs,
                 outputs=output_table
             )

 from scipy.sparse import csr_matrix
 class MatrixFactorization:
+    def __init__(self, n_factors=50):
         self.n_factors = n_factors
         self.model = TruncatedSVD(n_components=n_factors, random_state=42)
         self.user_title_matrix = None
         print("Training model...")
         start_time = time.time()
+        # Get top 10000 songs
+        top_songs = df.groupby('title')['play_count'].sum().nlargest(10000).index
         df_filtered = df[df['title'].isin(top_songs)]
+        print("Filtered to 10000 most played songs")
+        # Create pivot table
         pivot = pd.pivot_table(
             df_filtered,
             values='play_count',
             fill_value=0
         )
         self.column_names = pivot.columns
+        # Convert to sparse matrix
         self.user_title_matrix = csr_matrix(pivot.values)
+        # Create titles dataframe
         self.titles_df = df_filtered.groupby('title').agg({
             'artist_name': 'first',
             'year': 'first',
         self.user_vectors = self.model.fit_transform(self.user_title_matrix)
         self.item_vectors = self.model.components_
+        # Cache choices
+        self._cached_choices = self.create_title_choices()
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
+    def get_recommendations(self, selected_titles):
+        if not selected_titles:
+            return []
+        try:
+            actual_titles = [title.split(" • by ")[0] for title in selected_titles]
+            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
+            selected_indices = [title_to_idx[title] for title in actual_titles]
+            user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
+            scores = np.dot(user_vector, self.item_vectors)
+            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
+                           if title not in actual_titles]
+            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
+            results = []
+            for title, score in recommendations:
+                row = self.titles_df.loc[title]
+                confidence = 30 + (score * 70)
+                results.append([
+                    title,
+                    row['artist_name'],
+                    int(row['year']) if pd.notna(row['year']) else None,
+                    f"{min(max(confidence, 30), 100):.2f}%"
+                ])
+            return results
+        except Exception as e:
+            print(f"Error in recommendations: {str(e)}")
+            return []
+    def create_title_choices(self):
+        title_choices = []
         for title, row in self.titles_df.iterrows():
             display_text = f"{title} • by {row['artist_name']}"
             extra_info = []
                 extra_info.append(str(int(row['year'])))
             if extra_info:
                 display_text += f" [{', '.join(extra_info)}]"
+            title_choices.append(display_text)
+        return sorted(title_choices)
 def create_gradio_interface(mf_model):
     try:
             gr.Markdown("""# 🎵 Music Recommendation System 🎶
             ### Instructions:
+            1. ⏳ Model loads top 10000 songs (~1 min)
             2. 🔍 Search by title, artist, album, or year
             3. 🎧 Select up to 5 songs
             4. 👉 Click for recommendations
             with gr.Row():
                 input_songs = gr.Dropdown(
+                    choices=mf_model._cached_choices,
                     label="Search and select songs (up to 5)",
                     info="Format: Title • by Artist [Album, Year]",
                     multiselect=True,
                 )
             recommend_btn.click(
+                fn=mf_model.get_recommendations,
                 inputs=input_songs,
                 outputs=output_table
             )