GouthamVarma commited on
Commit
e95324b
Β·
verified Β·
1 Parent(s): f96ec1f

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +55 -74
model.py CHANGED
@@ -1,97 +1,77 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.decomposition import TruncatedSVD
4
- import time
5
- import gradio as gr
6
- from scipy.sparse import csr_matrix
7
-
8
  class MatrixFactorization:
9
- def __init__(self, n_factors=50):
10
  self.n_factors = n_factors
11
  self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
  self.user_title_matrix = None
13
  self.titles_df = None
14
- self.title_choices = None
15
- self.columns = None
16
 
17
  def fit(self, df):
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
- # Get top 10000 songs by play count for better performance
22
- top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
- top_songs = top_songs.nlargest(10000, 'play_count')
24
-
25
- # Filter original dataframe
26
- df_filtered = df[df['title'].isin(top_songs['title'])]
27
-
28
- # Pre-compute formatted title choices for dropdown
29
- self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
30
- self.title_choices['display'] = self.title_choices.apply(
31
- lambda x: f"{x['title']} β€’ by {x['artist_name']}" +
32
- (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
33
- else f" [{int(x['year'])}]" if pd.notna(x['year'])
34
- else f" [{x['release']}]" if pd.notna(x['release'])
35
- else ""),
36
- axis=1
37
- )
38
-
39
- # Create pivot table
40
  pivot = pd.pivot_table(
41
- df_filtered,
42
  values='play_count',
43
  index='user',
44
  columns='title',
45
  fill_value=0
46
  )
47
- self.columns = pivot.columns
48
 
49
- # Use sparse matrix for efficiency
50
  self.user_title_matrix = csr_matrix(pivot.values)
51
 
52
- # Train model
 
 
 
 
 
 
53
  self.user_vectors = self.model.fit_transform(self.user_title_matrix)
54
  self.item_vectors = self.model.components_
55
 
56
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
57
- print(f"Number of songs available: {len(self.title_choices)}")
58
-
 
59
  def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
60
- try:
61
- actual_titles = [display.split(" β€’ by ")[0] for display in selected_display_titles]
62
-
63
- title_to_idx = {title: idx for idx, title in enumerate(self.user_title_matrix.columns)}
64
- selected_indices = [title_to_idx[title] for title in actual_titles]
65
-
66
- user_vector = np.zeros((1, self.n_factors))
67
- for idx in selected_indices:
68
- user_vector += self.item_vectors[:, idx].reshape(1, -1)
69
- user_vector = user_vector / len(selected_indices)
70
-
71
- predicted_ratings = np.dot(user_vector, self.item_vectors)
72
- predicted_ratings = predicted_ratings.flatten()
73
-
74
- titles = self.user_title_matrix.columns
75
- title_scores = [(title, score) for title, score in zip(titles, predicted_ratings)
76
- if title not in actual_titles]
77
-
78
- recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
79
-
80
- results = []
81
- for title, score in recommendations:
82
- row = self.titles_df.loc[title]
83
- confidence = 30 + (score * 70)
84
- results.append([
85
- title,
86
- row['artist_name'],
87
- int(row['year']) if pd.notna(row['year']) else None,
88
- f"{min(max(confidence, 30), 100):.2f}%"
89
- ])
90
-
91
- return results
92
- except Exception as e:
93
- print(f"Error in recommendations: {str(e)}")
94
- return []
95
 
96
  def create_gradio_interface(mf_model):
97
  with gr.Blocks() as demo:
@@ -99,10 +79,11 @@ def create_gradio_interface(mf_model):
99
  # 🎡 Music Recommendation System 🎢
100
 
101
  ### Instructions:
102
- 1. πŸ” Search songs using title, artist, album, or year
103
- 2. 🎧 Select up to 5 songs from the dropdown
104
- 3. πŸ‘‰ Click 'Get Recommendations' for similar songs
105
- 4. πŸ“Š Results show song details with confidence scores
 
106
  """)
107
 
108
  with gr.Row():
 
 
 
 
 
 
 
 
1
  class MatrixFactorization:
2
+ def __init__(self, n_factors=100):
3
  self.n_factors = n_factors
4
  self.model = TruncatedSVD(n_components=n_factors, random_state=42)
5
  self.user_title_matrix = None
6
  self.titles_df = None
7
+ self.column_names = None
 
8
 
9
  def fit(self, df):
10
  print("Training model...")
11
  start_time = time.time()
12
 
13
+ # Create pivot table and store columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  pivot = pd.pivot_table(
15
+ df,
16
  values='play_count',
17
  index='user',
18
  columns='title',
19
  fill_value=0
20
  )
21
+ self.column_names = pivot.columns
22
 
23
+ # Convert to sparse matrix
24
  self.user_title_matrix = csr_matrix(pivot.values)
25
 
26
+ self.titles_df = df.groupby('title').agg({
27
+ 'artist_name': 'first',
28
+ 'year': 'first',
29
+ 'play_count': 'sum',
30
+ 'release': 'first'
31
+ })
32
+
33
  self.user_vectors = self.model.fit_transform(self.user_title_matrix)
34
  self.item_vectors = self.model.components_
35
 
36
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
37
+ print(f"Matrix shape: {self.user_title_matrix.shape}")
38
+ print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
39
+
40
  def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
41
+ try:
42
+ actual_titles = [display.split(" β€’ by ")[0] for display in selected_display_titles]
43
+
44
+ title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
45
+ selected_indices = [title_to_idx[title] for title in actual_titles]
46
+
47
+ user_vector = np.zeros((1, self.n_factors))
48
+ for idx in selected_indices:
49
+ user_vector += self.item_vectors[:, idx].reshape(1, -1)
50
+ user_vector = user_vector / len(selected_indices)
51
+
52
+ scores = np.dot(user_vector, self.item_vectors).flatten()
53
+
54
+ # Create recommendations using stored column names
55
+ title_scores = [(title, score) for title, score in zip(self.column_names, scores)
56
+ if title not in actual_titles]
57
+
58
+ recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
59
+
60
+ results = []
61
+ for title, score in recommendations:
62
+ row = self.titles_df.loc[title]
63
+ confidence = 30 + (score * 70) # Scale to 30-100 range
64
+ results.append([
65
+ title,
66
+ row['artist_name'],
67
+ int(row['year']) if pd.notna(row['year']) else None,
68
+ f"{min(max(confidence, 30), 100):.2f}%"
69
+ ])
70
+
71
+ return results
72
+ except Exception as e:
73
+ print(f"Error in recommendations: {str(e)}")
74
+ return []
 
75
 
76
  def create_gradio_interface(mf_model):
77
  with gr.Blocks() as demo:
 
79
  # 🎡 Music Recommendation System 🎢
80
 
81
  ### Instructions:
82
+ 1. ⏳ Given our large corpus of songs, it will take ~1 min to load
83
+ 2. πŸ” Search songs using Song Title, Artist, Album, or Year
84
+ 3. 🎧 Select up to 5 songs from the dropdown
85
+ 4. πŸ‘‰ Click 'Get Recommendations' for similar songs
86
+ 5. πŸ“Š Results show song details with confidence scores
87
  """)
88
 
89
  with gr.Row():