Spaces:

GouthamVarma
/

Spotify

Sleeping

App Files Files Community

GouthamVarma commited on Dec 13, 2024

Commit

f7c6ca0

verified ·

1 Parent(s): e95324b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +8 -7
model.py +125 -109

app.py CHANGED Viewed

@@ -3,21 +3,22 @@ import pandas as pd
 import numpy as np
 from sklearn.decomposition import TruncatedSVD
 import time
-from model import MatrixFactorization, create_gradio_interface
 try:
-    # Load the preprocessed data
     print("Loading data...")
     df = pd.read_csv('data.csv')
-    # Initialize and train the model
     print("Initializing model...")
-    mf_recommender = MatrixFactorization(n_factors=50)  # Reduced for speed
     mf_recommender.fit(df)
-    # Create and launch the Gradio interface
     print("Creating interface...")
-    demo = create_gradio_interface(mf_recommender)
-    demo.launch(share=True)
 except Exception as e:
     print(f"Error: {str(e)}")

 import numpy as np
 from sklearn.decomposition import TruncatedSVD
 import time
+from model import MatrixFactorization
 try:
+    # Load data
     print("Loading data...")
     df = pd.read_csv('data.csv')
+    # Initialize model
     print("Initializing model...")
+    mf_recommender = MatrixFactorization(n_factors=100)
     mf_recommender.fit(df)
+    # Create interface
     print("Creating interface...")
+    demo = mf_recommender.create_interface()
+    demo.launch()
 except Exception as e:
     print(f"Error: {str(e)}")

model.py CHANGED Viewed

@@ -1,110 +1,126 @@
-class MatrixFactorization:
-    def __init__(self, n_factors=100):
-        self.n_factors = n_factors
-        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
-        self.user_title_matrix = None
-        self.titles_df = None
-        self.column_names = None
-    def fit(self, df):
-        print("Training model...")
-        start_time = time.time()
-        # Create pivot table and store columns
-        pivot = pd.pivot_table(
-            df,
-            values='play_count',
-            index='user',
-            columns='title',
-            fill_value=0
-        )
-        self.column_names = pivot.columns
-        # Convert to sparse matrix
-        self.user_title_matrix = csr_matrix(pivot.values)
-        self.titles_df = df.groupby('title').agg({
-            'artist_name': 'first',
-            'year': 'first',
-            'play_count': 'sum',
-            'release': 'first'
-        })
-        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
-        self.item_vectors = self.model.components_
-        print(f"Training completed in {time.time() - start_time:.2f} seconds")
-        print(f"Matrix shape: {self.user_title_matrix.shape}")
-        print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
-    def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
-        try:
-            actual_titles = [display.split(" • by ")[0] for display in selected_display_titles]
-            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
-            selected_indices = [title_to_idx[title] for title in actual_titles]
-            user_vector = np.zeros((1, self.n_factors))
-            for idx in selected_indices:
-                user_vector += self.item_vectors[:, idx].reshape(1, -1)
-            user_vector = user_vector / len(selected_indices)
-            scores = np.dot(user_vector, self.item_vectors).flatten()
-            # Create recommendations using stored column names
-            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
-                          if title not in actual_titles]
-            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
-            results = []
-            for title, score in recommendations:
-                row = self.titles_df.loc[title]
-                confidence = 30 + (score * 70)  # Scale to 30-100 range
-                results.append([
-                    title,
-                    row['artist_name'],
-                    int(row['year']) if pd.notna(row['year']) else None,
-                    f"{min(max(confidence, 30), 100):.2f}%"
-                ])
-            return results
-        except Exception as e:
-            print(f"Error in recommendations: {str(e)}")
-            return []
-def create_gradio_interface(mf_model):
-    with gr.Blocks() as demo:
-        gr.Markdown("""
-        # 🎵 Music Recommendation System 🎶
-        ### Instructions:
-        1. ⏳ Given our large corpus of songs, it will take ~1 min to load
-        2. 🔍 Search songs using Song Title, Artist, Album, or Year
-        3. 🎧 Select up to 5 songs from the dropdown
-        4. 👉 Click 'Get Recommendations' for similar songs
-        5. 📊 Results show song details with confidence scores
-        """)
-        with gr.Row():
-            input_songs = gr.Dropdown(
-                choices=sorted(mf_model.title_choices['display'].tolist()),
-                label="Select songs (up to 5)",
-                multiselect=True,
-                max_choices=5,
-                filterable=True
-            )
-        with gr.Column():
-            recommend_btn = gr.Button("Get Recommendations", size="lg")
-            output_table = gr.DataFrame(
-                headers=["Song", "Artist", "Year", "Confidence"],
-                label="Recommendations"
-            )
-        recommend_btn.click(
-            fn=mf_model.get_recommendations_from_titles,
-            inputs=input_songs,
-            outputs=output_table
-        )
     return demo

+import pandas as pd
+import numpy as np
+from sklearn.decomposition import TruncatedSVD
+import time
+import gradio as gr
+from scipy.sparse import csr_matrix
+class MatrixFactorization:
+    def __init__(self, n_factors=100):
+        self.n_factors = n_factors
+        self.model = TruncatedSVD(n_components=n_factors, random_state=42)
+        self.user_title_matrix = None
+        self.titles_df = None
+        self.column_names = None
+    def fit(self, df):
+        print("Training model...")
+        start_time = time.time()
+        # Create pivot table and store columns
+        pivot = pd.pivot_table(
+            df,
+            values='play_count',
+            index='user',
+            columns='title',
+            fill_value=0
+        )
+        self.column_names = pivot.columns
+        # Convert to sparse matrix
+        self.user_title_matrix = csr_matrix(pivot.values)
+        self.titles_df = df.groupby('title').agg({
+            'artist_name': 'first',
+            'year': 'first',
+            'play_count': 'sum',
+            'release': 'first'
+        })
+        self.user_vectors = self.model.fit_transform(self.user_title_matrix)
+        self.item_vectors = self.model.components_
+        print(f"Training completed in {time.time() - start_time:.2f} seconds")
+        print(f"Matrix shape: {self.user_title_matrix.shape}")
+        print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
+    def get_recommendations(self, selected_titles):
+        if not selected_titles:
+            return []
+        try:
+            actual_titles = [title.split(" • by ")[0] for title in selected_titles]
+            title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
+            selected_indices = [title_to_idx[title] for title in actual_titles]
+            user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
+            scores = np.dot(user_vector, self.item_vectors)
+            title_scores = [(title, score) for title, score in zip(self.column_names, scores)
+                           if title not in actual_titles]
+            recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
+            results = []
+            for title, score in recommendations:
+                row = self.titles_df.loc[title]
+                confidence = 30 + (score * 70)
+                results.append([
+                    title,
+                    row['artist_name'],
+                    int(row['year']) if pd.notna(row['year']) else None,
+                    f"{min(max(confidence, 30), 100):.2f}%"
+                ])
+            return results
+        except Exception as e:
+            print(f"Error in recommendations: {str(e)}")
+            return []
+    def create_interface(self):
+        title_choices = []
+        for title, row in self.titles_df.iterrows():
+            display_text = f"{title} • by {row['artist_name']}"
+            extra_info = []
+            if pd.notna(row['release']):
+                extra_info.append(row['release'])
+            if pd.notna(row['year']):
+                extra_info.append(str(int(row['year'])))
+            if extra_info:
+                display_text += f" [{', '.join(extra_info)}]"
+            title_choices.append(display_text)
+def create_gradio_interface(mf_model):
+    with gr.Blocks() as demo:
+        gr.Markdown("""
+        # 🎵 Music Recommendation System 🎶
+        ### Instructions:
+        1. ⏳ Given our large corpus, it will take ~1 min to load the model
+        1. 🔍 Search songs using title, artist, album, or year
+        2. 🎧 Select up to 5 songs from the dropdown
+        3. 👉 Click 'Get Recommendations' for similar songs
+        4. 📊 Results show song details with confidence scores
+        """)
+        with gr.Row():
+            input_songs = gr.Dropdown(
+                choices=sorted(mf_model.title_choices['display'].tolist()),
+                label="Select songs (up to 5)",
+                multiselect=True,
+                max_choices=5,
+                filterable=True
+            )
+        with gr.Column():
+            recommend_btn = gr.Button("Get Recommendations", size="lg")
+            output_table = gr.DataFrame(
+                headers=["Song", "Artist", "Year", "Confidence"],
+                label="Recommendations"
+            )
+        recommend_btn.click(
+            fn=mf_model.get_recommendations_from_titles,
+            inputs=input_songs,
+            outputs=output_table
+        )
     return demo