Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

eefe640

verified ·

1 Parent(s): f80207b

Update model.py

Browse files

Files changed (1) hide show

model.py +112 -129

model.py CHANGED Viewed

@@ -1,130 +1,113 @@
-import pandas as pd
-import numpy as np
-from sklearn.decomposition import TruncatedSVD
-import time
-import gradio as gr
-from scipy.sparse import csr_matrix
-class MatrixFactorization:
-    def __init__(self, n_factors=100):
-        self.n_factors = n_factors
-        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
-        self.user_title_matrix = None
-        self.titles_df = None
-        self.column_names = None
-    def fit(self, df):
-        print("Training model...")
-        start_time = time.time()
-        pivot = pd.pivot_table(
-            df,
-            values='play_count',
-            index='user',
-            columns='title',
-            fill_value=0
-        )
-        self.column_names = pivot.columns
-        self.user_title_matrix = csr_matrix(pivot.values)
-        self.titles_df = df.groupby('title').agg({
-            'artist_name': 'first',
-            'year': 'first',
-            'play_count': 'sum',
-            'release': 'first'
-        })
-        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
-        self.item_vectors = self.model.components_
-        print(f"Training completed in {time.time() - start_time:.2f} seconds")
-        print(f"Matrix shape: {self.user_title_matrix.shape}")
-        print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
-    def get_recommendations_from_titles(self, selected_titles):
-        if not selected_titles:
-            return []
-        try:
-            actual_titles = [title.split(" • by ")[0] for title in selected_titles]
-            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
-            selected_indices = [title_to_idx[title] for title in actual_titles]
-            user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
-            scores = np.dot(user_vector, self.item_vectors)
-            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
-                           if title not in actual_titles]
-            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
-            results = []
-            for title, score in recommendations:
-                row = self.titles_df.loc[title]
-                confidence = 30 + (score * 70)
-                results.append([
-                    title,
-                    row['artist_name'],
-                    int(row['year']) if pd.notna(row['year']) else None,
-                    f"{min(max(confidence, 30), 100):.2f}%"
-                ])
-            return results
-        except Exception as e:
-            print(f"Error in recommendations: {str(e)}")
-            return []
-    def create_title_choices(self):
-        title_choices = []
-        for title, row in self.titles_df.iterrows():
-            display_text = f"{title} • by {row['artist_name']}"
-            extra_info = []
-            if pd.notna(row['release']):
-                extra_info.append(row['release'])
-            if pd.notna(row['year']):
-                extra_info.append(str(int(row['year'])))
-            if extra_info:
-                display_text += f" [{', '.join(extra_info)}]"
-            title_choices.append(display_text)
-        return title_choices
-def create_gradio_interface(mf_model):
-    try:
-        with gr.Blocks() as demo:
-            gr.Markdown("""# 🎵 Music Recommendation System 🎶
-            ### Instructions:
-            1. ⏳ Given our large corpus, it will take ~1 min to load the model
-            2. 🔍 Search songs using title, artist, album, or year
-            3. 🎧 Select up to 5 songs from the dropdown
-            4. 👉 Click 'Get Recommendations' for similar songs
-            5. 📊 Results show song details with confidence scores (30-100%)
-            """)
-            with gr.Row():
-                input_songs = gr.Dropdown(
-                    choices=sorted(mf_model.create_title_choices()),
-                    label="Search and select songs (up to 5)",
-                    info="Format: Title • by Artist [Album, Year]",
-                    multiselect=True,
-                    max_choices=5,
-                    filterable=True
-                )
-            with gr.Column():
-                recommend_btn = gr.Button("Get Recommendations", size="lg")
-                output_table = gr.DataFrame(
-                    headers=["Song", "Artist", "Year", "Confidence"],
-                    label="Recommended Songs"
-                )
-            recommend_btn.click(
-                fn=mf_model.get_recommendations_from_titles,
-                inputs=input_songs,
-                outputs=output_table
-            )
-        return demo
-    except Exception as e:
-        print(f"Error creating interface: {str(e)}")
         return None

+import pandas as pd
+import numpy as np
+from sklearn.decomposition import TruncatedSVD
+import time
+import gradio as gr
+from scipy.sparse import csr_matrix
+class MatrixFactorization:
+    def __init__(self, n_factors=50):  # Reduced factors
+        self.n_factors = n_factors
+        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
+        self.user_title_matrix = None
+        self.titles_df = None
+        self.column_names = None
+        self._cached_choices = None
+    def fit(self, df):
+        print("Training model...")
+        start_time = time.time()
+        # Get top songs by play count
+        top_songs = (df.groupby('title')['play_count']
+                    .sum()
+                    .sort_values(ascending=False)
+                    .head(10000)
+                    .index)
+        df_filtered = df[df['title'].isin(top_songs)]
+        print(f"Filtered to {len(top_songs)} most played songs")
+        pivot = pd.pivot_table(
+            df_filtered,
+            values='play_count',
+            index='user',
+            columns='title',
+            fill_value=0
+        )
+        self.column_names = pivot.columns
+        self.user_title_matrix = csr_matrix(pivot.values)
+        self.titles_df = df_filtered.groupby('title').agg({
+            'artist_name': 'first',
+            'year': 'first',
+            'play_count': 'sum',
+            'release': 'first'
+        })
+        print("Training SVD model...")
+        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
+        self.item_vectors = self.model.components_
+        # Pre-cache choices
+        self._cached_choices = self._generate_choices()
+        print(f"Training completed in {time.time() - start_time:.2f} seconds")
+    def _generate_choices(self):
+        choices = []
+        for title, row in self.titles_df.iterrows():
+            display_text = f"{title} • by {row['artist_name']}"
+            extra_info = []
+            if pd.notna(row['release']):
+                extra_info.append(row['release'])
+            if pd.notna(row['year']):
+                extra_info.append(str(int(row['year'])))
+            if extra_info:
+                display_text += f" [{', '.join(extra_info)}]"
+            choices.append(display_text)
+        return sorted(choices)
+    def create_title_choices(self):
+        return self._cached_choices if self._cached_choices else self._generate_choices()
+def create_gradio_interface(mf_model):
+    try:
+        with gr.Blocks() as demo:
+            gr.Markdown("""# 🎵 Music Recommendation System 🎶
+            ### Instructions:
+            1. ⏳ Loading ~10,000 most popular songs
+            2. 🔍 Search by title, artist, album, or year
+            3. 🎧 Select up to 5 songs
+            4. 👉 Click for recommendations
+            5. 📊 View confidence scores (30-100%)
+            """)
+            with gr.Row():
+                input_songs = gr.Dropdown(
+                    choices=mf_model.create_title_choices(),
+                    label="Search and select songs (up to 5)",
+                    info="Format: Title • by Artist [Album, Year]",
+                    multiselect=True,
+                    max_choices=5,
+                    filterable=True
+                )
+            with gr.Column():
+                recommend_btn = gr.Button("Get Recommendations", size="lg")
+                output_table = gr.DataFrame(
+                    headers=["Song", "Artist", "Year", "Confidence"],
+                    label="Recommended Songs"
+                )
+            recommend_btn.click(
+                fn=mf_model.get_recommendations_from_titles,
+                inputs=input_songs,
+                outputs=output_table
+            )
+        return demo
+    except Exception as e:
+        print(f"Error creating interface: {str(e)}")
         return None