Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

f96ec1f

verified ·

1 Parent(s): 98ae331

Update model.py

Browse files

Files changed (1) hide show

model.py +128 -129

model.py CHANGED Viewed

@@ -1,130 +1,129 @@
-import pandas as pd
-import numpy as np
-from sklearn.decomposition import TruncatedSVD
-import time
-import gradio as gr
-from scipy.sparse import csr_matrix
-class MatrixFactorization:
-    def __init__(self, n_factors=50):
-        self.n_factors = n_factors
-        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
-        self.user_title_matrix = None
-        self.titles_df = None
-        self.title_choices = None
-        self.columns = None
-    def fit(self, df):
-        print("Training model...")
-        start_time = time.time()
-        # Get top 10000 songs by play count for better performance
-        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
-        top_songs = top_songs.nlargest(10000, 'play_count')
-        # Filter original dataframe
-        df_filtered = df[df['title'].isin(top_songs['title'])]
-        # Pre-compute formatted title choices for dropdown
-        self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
-        self.title_choices['display'] = self.title_choices.apply(
-            lambda x: f"{x['title']} • by {x['artist_name']}" +
-                     (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
-                      else f" [{int(x['year'])}]" if pd.notna(x['year'])
-                      else f" [{x['release']}]" if pd.notna(x['release'])
-                      else ""),
-            axis=1
-        )
-        # Create pivot table
-        pivot = pd.pivot_table(
-            df_filtered,
-            values='play_count',
-            index='user',
-            columns='title',
-            fill_value=0
-        )
-        self.columns = pivot.columns
-        # Use sparse matrix for efficiency
-        self.user_title_matrix = csr_matrix(pivot.values)
-        # Train model
-        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
-        self.item_vectors = self.model.components_
-        print(f"Training completed in {time.time() - start_time:.2f} seconds")
-        print(f"Number of songs available: {len(self.title_choices)}")
-    def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
-        if not selected_titles:
-            return []
-        try:
-            titles = [title.split(" • by ")[0] for title in selected_titles]
-            indices = [np.where(self.columns == title)[0][0] for title in titles]
-            # Calculate average user vector from selected songs
-            user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
-            scores = np.dot(user_vector, self.item_vectors)
-            # Get recommendations
-            top_indices = np.argsort(scores)[::-1]
-            recommendations = []
-            count = 0
-            for idx in top_indices:
-                title = self.columns[idx]
-                if title not in titles:
-                    display = self.title_choices[self.title_choices['title'] == title].iloc[0]
-                    conf_score = max(min(scores[idx] * 100, 100), 30)
-                    recommendations.append([
-                        title,
-                        display['artist_name'],
-                        int(display['year']) if pd.notna(display['year']) else None,
-                        f"{conf_score:.2f}%"
-                    ])
-                    count += 1
-                if count >= n_recommendations:
-                    break
-            return recommendations
-        except Exception as e:
-            print(f"Error in recommendations: {str(e)}")
-            return []
-def create_gradio_interface(mf_model):
-    with gr.Blocks() as demo:
-        gr.Markdown("""
-        # 🎵 Music Recommendation System 🎶
-        ### Instructions:
-        1. 🔍 Search songs using title, artist, album, or year
-        2. 🎧 Select up to 5 songs from the dropdown
-        3. 👉 Click 'Get Recommendations' for similar songs
-        4. 📊 Results show song details with confidence scores
-        """)
-        with gr.Row():
-            input_songs = gr.Dropdown(
-                choices=sorted(mf_model.title_choices['display'].tolist()),
-                label="Select songs (up to 5)",
-                multiselect=True,
-                max_choices=5,
-                filterable=True
-            )
-        with gr.Column():
-            recommend_btn = gr.Button("Get Recommendations", size="lg")
-            output_table = gr.DataFrame(
-                headers=["Song", "Artist", "Year", "Confidence"],
-                label="Recommendations"
-            )
-        recommend_btn.click(
-            fn=mf_model.get_recommendations_from_titles,
-            inputs=input_songs,
-            outputs=output_table
-        )
     return demo

+import pandas as pd
+import numpy as np
+from sklearn.decomposition import TruncatedSVD
+import time
+import gradio as gr
+from scipy.sparse import csr_matrix
+class MatrixFactorization:
+    def __init__(self, n_factors=50):
+        self.n_factors = n_factors
+        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
+        self.user_title_matrix = None
+        self.titles_df = None
+        self.title_choices = None
+        self.columns = None
+    def fit(self, df):
+        print("Training model...")
+        start_time = time.time()
+        # Get top 10000 songs by play count for better performance
+        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
+        top_songs = top_songs.nlargest(10000, 'play_count')
+        # Filter original dataframe
+        df_filtered = df[df['title'].isin(top_songs['title'])]
+        # Pre-compute formatted title choices for dropdown
+        self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
+        self.title_choices['display'] = self.title_choices.apply(
+            lambda x: f"{x['title']} • by {x['artist_name']}" +
+                     (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
+                      else f" [{int(x['year'])}]" if pd.notna(x['year'])
+                      else f" [{x['release']}]" if pd.notna(x['release'])
+                      else ""),
+            axis=1
+        )
+        # Create pivot table
+        pivot = pd.pivot_table(
+            df_filtered,
+            values='play_count',
+            index='user',
+            columns='title',
+            fill_value=0
+        )
+        self.columns = pivot.columns
+        # Use sparse matrix for efficiency
+        self.user_title_matrix = csr_matrix(pivot.values)
+        # Train model
+        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
+        self.item_vectors = self.model.components_
+        print(f"Training completed in {time.time() - start_time:.2f} seconds")
+        print(f"Number of songs available: {len(self.title_choices)}")
+    def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
+            try:
+                actual_titles = [display.split(" • by ")[0] for display in selected_display_titles]
+                title_to_idx = {title: idx for idx, title in enumerate(self.user_title_matrix.columns)}
+                selected_indices = [title_to_idx[title] for title in actual_titles]
+                user_vector = np.zeros((1, self.n_factors))
+                for idx in selected_indices:
+                    user_vector += self.item_vectors[:, idx].reshape(1, -1)
+                user_vector = user_vector / len(selected_indices)
+                predicted_ratings = np.dot(user_vector, self.item_vectors)
+                predicted_ratings = predicted_ratings.flatten()
+                titles = self.user_title_matrix.columns
+                title_scores = [(title, score) for title, score in zip(titles, predicted_ratings)
+                              if title not in actual_titles]
+                recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
+                results = []
+                for title, score in recommendations:
+                    row = self.titles_df.loc[title]
+                    confidence = 30 + (score * 70)
+                    results.append([
+                        title,
+                        row['artist_name'],
+                        int(row['year']) if pd.notna(row['year']) else None,
+                        f"{min(max(confidence, 30), 100):.2f}%"
+                    ])
+                return results
+            except Exception as e:
+                print(f"Error in recommendations: {str(e)}")
+                return []
+def create_gradio_interface(mf_model):
+    with gr.Blocks() as demo:
+        gr.Markdown("""
+        # 🎵 Music Recommendation System 🎶
+        ### Instructions:
+        1. 🔍 Search songs using title, artist, album, or year
+        2. 🎧 Select up to 5 songs from the dropdown
+        3. 👉 Click 'Get Recommendations' for similar songs
+        4. 📊 Results show song details with confidence scores
+        """)
+        with gr.Row():
+            input_songs = gr.Dropdown(
+                choices=sorted(mf_model.title_choices['display'].tolist()),
+                label="Select songs (up to 5)",
+                multiselect=True,
+                max_choices=5,
+                filterable=True
+            )
+        with gr.Column():
+            recommend_btn = gr.Button("Get Recommendations", size="lg")
+            output_table = gr.DataFrame(
+                headers=["Song", "Artist", "Year", "Confidence"],
+                label="Recommendations"
+            )
+        recommend_btn.click(
+            fn=mf_model.get_recommendations_from_titles,
+            inputs=input_songs,
+            outputs=output_table
+        )
     return demo