Spotify / model.py
GouthamVarma's picture
Upload 5 files
81b661c verified
raw
history blame
4.19 kB
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix
class MatrixFactorization:
def __init__(self, n_factors=50):
self.n_factors = n_factors
self.model = TruncatedSVD(n_components=n_factors, random_state=42)
self.user_title_matrix = None
self.titles_df = None
self.title_choices = None
self.columns = None
def fit(self, df):
print("Training model...")
start_time = time.time()
# Pre-compute title choices for dropdown
self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index()
self.title_choices['display'] = self.title_choices.apply(
lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
axis=1
)
# Create pivot table and cache columns
pivot = pd.pivot_table(
df,
values='play_count',
index='user',
columns='title',
fill_value=0
)
self.columns = pivot.columns
# Convert to sparse matrix
self.user_title_matrix = csr_matrix(pivot.values)
# Train model
self.user_vectors = self.model.fit_transform(self.user_title_matrix)
self.item_vectors = self.model.components_
print(f"Training completed in {time.time() - start_time:.2f} seconds")
def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
if not selected_titles:
return []
try:
# Extract titles from display format
titles = [title.split(" • by ")[0] for title in selected_titles]
# Get indices of selected titles
indices = [np.where(self.columns == title)[0][0] for title in titles]
# Calculate user vector
user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
# Get predictions
scores = np.dot(user_vector, self.item_vectors)
# Get top recommendations
top_indices = np.argsort(scores)[::-1]
# Filter out selected titles
recommendations = []
count = 0
for idx in top_indices:
title = self.columns[idx]
if title not in titles:
display = self.title_choices[self.title_choices['title'] == title].iloc[0]
recommendations.append([
title,
display['artist_name'],
int(display['year']) if pd.notna(display['year']) else None,
f"{scores[idx] * 100:.2f}%"
])
count += 1
if count >= n_recommendations:
break
return recommendations
except Exception as e:
print(f"Error in recommendations: {str(e)}")
return []
def create_gradio_interface(mf_model):
with gr.Blocks() as demo:
gr.Markdown("# Music Recommendation System")
with gr.Row():
input_songs = gr.Dropdown(
choices=sorted(mf_model.title_choices['display'].tolist()),
label="Select songs (up to 5)",
multiselect=True,
max_choices=5,
filterable=True
)
with gr.Row():
recommend_btn = gr.Button("Get Recommendations")
output_table = gr.DataFrame(
headers=["Song", "Artist", "Year", "Confidence"],
label="Recommendations"
)
recommend_btn.click(
fn=mf_model.get_recommendations_from_titles,
inputs=input_songs,
outputs=output_table
)
return demo