Spotify / model.py
GouthamVarma's picture
Update model.py
3403e78 verified
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix
class MatrixFactorization:
def __init__(self, n_factors=50):
self.n_factors = n_factors
self.model = TruncatedSVD(n_components=n_factors, random_state=42)
self.user_title_matrix = None
self.titles_df = None
self.column_names = None
self._cached_choices = None
def fit(self, df):
print("Training model...")
start_time = time.time()
# Get top 10000 songs
top_songs = df.groupby('title')['play_count'].sum().nlargest(10000).index
df_filtered = df[df['title'].isin(top_songs)]
print("Filtered to 10000 most played songs")
# Create pivot table
pivot = pd.pivot_table(
df_filtered,
values='play_count',
index='user',
columns='title',
fill_value=0
)
self.column_names = pivot.columns
# Convert to sparse matrix
self.user_title_matrix = csr_matrix(pivot.values)
# Create titles dataframe
self.titles_df = df_filtered.groupby('title').agg({
'artist_name': 'first',
'year': 'first',
'play_count': 'sum',
'release': 'first'
})
print("Training SVD model...")
self.user_vectors = self.model.fit_transform(self.user_title_matrix)
self.item_vectors = self.model.components_
# Cache choices
self._cached_choices = self.create_title_choices()
print(f"Training completed in {time.time() - start_time:.2f} seconds")
def get_recommendations(self, selected_titles):
if not selected_titles:
return []
try:
actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
selected_indices = [title_to_idx[title] for title in actual_titles]
user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
scores = np.dot(user_vector, self.item_vectors)
title_scores = [(title, score) for title, score in zip(self.column_names, scores)
if title not in actual_titles]
recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
results = []
for title, score in recommendations:
row = self.titles_df.loc[title]
confidence = 30 + (score * 70)
results.append([
title,
row['artist_name'],
int(row['year']) if pd.notna(row['year']) else None,
f"{min(max(confidence, 30), 100):.2f}%"
])
return results
except Exception as e:
print(f"Error in recommendations: {str(e)}")
return []
def create_title_choices(self):
title_choices = []
for title, row in self.titles_df.iterrows():
display_text = f"{title} β€’ by {row['artist_name']}"
extra_info = []
if pd.notna(row['release']):
extra_info.append(row['release'])
if pd.notna(row['year']):
extra_info.append(str(int(row['year'])))
if extra_info:
display_text += f" [{', '.join(extra_info)}]"
title_choices.append(display_text)
return sorted(title_choices)
def create_gradio_interface(mf_model):
try:
with gr.Blocks() as demo:
gr.Markdown("""# 🎡 Music Recommendation System 🎢
### Instructions:
1. ⏳ Model loads songs (~1 min)
2. πŸ” Search by title, artist, album, or year
3. 🎧 Select up to 5 songs
4. πŸ‘‰ Click for recommendations
5. πŸ“Š View confidence scores (30-100%)
""")
with gr.Row():
input_songs = gr.Dropdown(
choices=mf_model._cached_choices,
label="Search and select songs (up to 5)",
info="Format: Title β€’ by Artist [Album, Year]",
multiselect=True,
max_choices=5,
filterable=True
)
with gr.Column():
recommend_btn = gr.Button("Get Recommendations", size="lg")
output_table = gr.DataFrame(
headers=["Song", "Artist", "Year", "Confidence"],
label="Recommended Songs"
)
recommend_btn.click(
fn=mf_model.get_recommendations,
inputs=input_songs,
outputs=output_table
)
return demo
except Exception as e:
print(f"Error creating interface: {str(e)}")
return None