Spaces:
Sleeping
Sleeping
File size: 5,106 Bytes
f7c6ca0 f80207b f7c6ca0 f80207b f7c6ca0 f80207b f7c6ca0 f80207b f7c6ca0 f80207b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
import time
import gradio as gr
from scipy.sparse import csr_matrix
class MatrixFactorization:
def __init__(self, n_factors=100):
self.n_factors = n_factors
self.model = TruncatedSVD(n_components=n_factors, random_state=42)
self.user_title_matrix = None
self.titles_df = None
self.column_names = None
def fit(self, df):
print("Training model...")
start_time = time.time()
pivot = pd.pivot_table(
df,
values='play_count',
index='user',
columns='title',
fill_value=0
)
self.column_names = pivot.columns
self.user_title_matrix = csr_matrix(pivot.values)
self.titles_df = df.groupby('title').agg({
'artist_name': 'first',
'year': 'first',
'play_count': 'sum',
'release': 'first'
})
self.user_vectors = self.model.fit_transform(self.user_title_matrix)
self.item_vectors = self.model.components_
print(f"Training completed in {time.time() - start_time:.2f} seconds")
print(f"Matrix shape: {self.user_title_matrix.shape}")
print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
def get_recommendations_from_titles(self, selected_titles):
if not selected_titles:
return []
try:
actual_titles = [title.split(" β’ by ")[0] for title in selected_titles]
title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
selected_indices = [title_to_idx[title] for title in actual_titles]
user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
scores = np.dot(user_vector, self.item_vectors)
title_scores = [(title, score) for title, score in zip(self.column_names, scores)
if title not in actual_titles]
recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
results = []
for title, score in recommendations:
row = self.titles_df.loc[title]
confidence = 30 + (score * 70)
results.append([
title,
row['artist_name'],
int(row['year']) if pd.notna(row['year']) else None,
f"{min(max(confidence, 30), 100):.2f}%"
])
return results
except Exception as e:
print(f"Error in recommendations: {str(e)}")
return []
def create_title_choices(self):
title_choices = []
for title, row in self.titles_df.iterrows():
display_text = f"{title} β’ by {row['artist_name']}"
extra_info = []
if pd.notna(row['release']):
extra_info.append(row['release'])
if pd.notna(row['year']):
extra_info.append(str(int(row['year'])))
if extra_info:
display_text += f" [{', '.join(extra_info)}]"
title_choices.append(display_text)
return title_choices
def create_gradio_interface(mf_model):
try:
with gr.Blocks() as demo:
gr.Markdown("""# π΅ Music Recommendation System πΆ
### Instructions:
1. β³ Given our large corpus, it will take ~1 min to load the model
2. π Search songs using title, artist, album, or year
3. π§ Select up to 5 songs from the dropdown
4. π Click 'Get Recommendations' for similar songs
5. π Results show song details with confidence scores (30-100%)
""")
with gr.Row():
input_songs = gr.Dropdown(
choices=sorted(mf_model.create_title_choices()),
label="Search and select songs (up to 5)",
info="Format: Title β’ by Artist [Album, Year]",
multiselect=True,
max_choices=5,
filterable=True
)
with gr.Column():
recommend_btn = gr.Button("Get Recommendations", size="lg")
output_table = gr.DataFrame(
headers=["Song", "Artist", "Year", "Confidence"],
label="Recommended Songs"
)
recommend_btn.click(
fn=mf_model.get_recommendations_from_titles,
inputs=input_songs,
outputs=output_table
)
return demo
except Exception as e:
print(f"Error creating interface: {str(e)}")
return None |