Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.decomposition import TruncatedSVD | |
import time | |
import gradio as gr | |
from scipy.sparse import csr_matrix | |
class MatrixFactorization: | |
def __init__(self, n_factors=50): | |
self.n_factors = n_factors | |
self.model = TruncatedSVD(n_components=n_factors, random_state=42) | |
self.user_title_matrix = None | |
self.titles_df = None | |
self.title_choices = None | |
self.columns = None | |
def fit(self, df): | |
print("Training model...") | |
start_time = time.time() | |
# Pre-compute title choices for dropdown | |
self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index() | |
self.title_choices['display'] = self.title_choices.apply( | |
lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""), | |
axis=1 | |
) | |
# Create pivot table and cache columns | |
pivot = pd.pivot_table( | |
df, | |
values='play_count', | |
index='user', | |
columns='title', | |
fill_value=0 | |
) | |
self.columns = pivot.columns | |
# Convert to sparse matrix | |
self.user_title_matrix = csr_matrix(pivot.values) | |
# Train model | |
self.user_vectors = self.model.fit_transform(self.user_title_matrix) | |
self.item_vectors = self.model.components_ | |
print(f"Training completed in {time.time() - start_time:.2f} seconds") | |
def get_recommendations_from_titles(self, selected_titles, n_recommendations=5): | |
if not selected_titles: | |
return [] | |
try: | |
# Extract titles from display format | |
titles = [title.split(" • by ")[0] for title in selected_titles] | |
# Get indices of selected titles | |
indices = [np.where(self.columns == title)[0][0] for title in titles] | |
# Calculate user vector | |
user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0) | |
# Get predictions | |
scores = np.dot(user_vector, self.item_vectors) | |
# Get top recommendations | |
top_indices = np.argsort(scores)[::-1] | |
# Filter out selected titles | |
recommendations = [] | |
count = 0 | |
for idx in top_indices: | |
title = self.columns[idx] | |
if title not in titles: | |
display = self.title_choices[self.title_choices['title'] == title].iloc[0] | |
recommendations.append([ | |
title, | |
display['artist_name'], | |
int(display['year']) if pd.notna(display['year']) else None, | |
f"{scores[idx] * 100:.2f}%" | |
]) | |
count += 1 | |
if count >= n_recommendations: | |
break | |
return recommendations | |
except Exception as e: | |
print(f"Error in recommendations: {str(e)}") | |
return [] | |
def create_gradio_interface(mf_model): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Music Recommendation System") | |
with gr.Row(): | |
input_songs = gr.Dropdown( | |
choices=sorted(mf_model.title_choices['display'].tolist()), | |
label="Select songs (up to 5)", | |
multiselect=True, | |
max_choices=5, | |
filterable=True | |
) | |
with gr.Row(): | |
recommend_btn = gr.Button("Get Recommendations") | |
output_table = gr.DataFrame( | |
headers=["Song", "Artist", "Year", "Confidence"], | |
label="Recommendations" | |
) | |
recommend_btn.click( | |
fn=mf_model.get_recommendations_from_titles, | |
inputs=input_songs, | |
outputs=output_table | |
) | |
return demo |