Spaces:

GouthamVarma
/

Spotify

Sleeping

GouthamVarma commited on Dec 13, 2024

Commit

d49e583

verified ·

1 Parent(s): c8837a4

Upload 2 files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -5,13 +5,19 @@ from sklearn.decomposition import TruncatedSVD
 import time
 from model import MatrixFactorization, create_gradio_interface
-# Load the preprocessed data
-df = pd.read_csv('data.csv')
-# Initialize and train the model
-mf_recommender = MatrixFactorization(n_factors=100)
-mf_recommender.fit(df)
-# Create and launch the Gradio interface
-demo = create_gradio_interface(mf_recommender)
-demo.launch()

 import time
 from model import MatrixFactorization, create_gradio_interface
+try:
+    # Load the preprocessed data
+    print("Loading data...")
+    df = pd.read_csv('data.csv')
+    # Initialize and train the model
+    print("Initializing model...")
+    mf_recommender = MatrixFactorization(n_factors=50)  # Reduced for speed
+    mf_recommender.fit(df)
+    # Create and launch the Gradio interface
+    print("Creating interface...")
+    demo = create_gradio_interface(mf_recommender)
+    demo.launch(share=True)
+except Exception as e:
+    print(f"Error: {str(e)}")

model.py CHANGED Viewed

@@ -18,8 +18,15 @@ class MatrixFactorization:
         print("Training model...")
         start_time = time.time()
         # Pre-compute title choices for dropdown
-        self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index()
         self.title_choices['display'] = self.title_choices.apply(
             lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
             axis=1
@@ -27,7 +34,7 @@ class MatrixFactorization:
         # Create pivot table and cache columns
         pivot = pd.pivot_table(
-            df,
             values='play_count',
             index='user',
             columns='title',
@@ -43,6 +50,7 @@ class MatrixFactorization:
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
     def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
         if not selected_titles:

         print("Training model...")
         start_time = time.time()
+        # Get top 10000 songs by play count
+        top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
+        top_songs = top_songs.nlargest(10000, 'play_count')
+        # Filter original dataframe to only include top songs
+        df_filtered = df[df['title'].isin(top_songs['title'])]
         # Pre-compute title choices for dropdown
+        self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
         self.title_choices['display'] = self.title_choices.apply(
             lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
             axis=1
         # Create pivot table and cache columns
         pivot = pd.pivot_table(
+            df_filtered,
             values='play_count',
             index='user',
             columns='title',
         self.item_vectors = self.model.components_
         print(f"Training completed in {time.time() - start_time:.2f} seconds")
+        print(f"Number of songs in dropdown: {len(self.title_choices)}")
     def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
         if not selected_titles: