GouthamVarma commited on
Commit
98ae331
Β·
verified Β·
1 Parent(s): d49e583

update model

Browse files
Files changed (1) hide show
  1. model.py +29 -21
model.py CHANGED
@@ -18,21 +18,25 @@ class MatrixFactorization:
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
- # Get top 10000 songs by play count
22
  top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
  top_songs = top_songs.nlargest(10000, 'play_count')
24
 
25
- # Filter original dataframe to only include top songs
26
  df_filtered = df[df['title'].isin(top_songs['title'])]
27
 
28
- # Pre-compute title choices for dropdown
29
- self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
30
  self.title_choices['display'] = self.title_choices.apply(
31
- lambda x: f"{x['title']} β€’ by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
 
 
 
 
32
  axis=1
33
  )
34
 
35
- # Create pivot table and cache columns
36
  pivot = pd.pivot_table(
37
  df_filtered,
38
  values='play_count',
@@ -42,7 +46,7 @@ class MatrixFactorization:
42
  )
43
  self.columns = pivot.columns
44
 
45
- # Convert to sparse matrix
46
  self.user_title_matrix = csr_matrix(pivot.values)
47
 
48
  # Train model
@@ -50,40 +54,35 @@ class MatrixFactorization:
50
  self.item_vectors = self.model.components_
51
 
52
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
53
- print(f"Number of songs in dropdown: {len(self.title_choices)}")
54
 
55
  def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
56
  if not selected_titles:
57
  return []
58
 
59
  try:
60
- # Extract titles from display format
61
  titles = [title.split(" β€’ by ")[0] for title in selected_titles]
62
-
63
- # Get indices of selected titles
64
  indices = [np.where(self.columns == title)[0][0] for title in titles]
65
 
66
- # Calculate user vector
67
  user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
68
-
69
- # Get predictions
70
  scores = np.dot(user_vector, self.item_vectors)
71
 
72
- # Get top recommendations
73
  top_indices = np.argsort(scores)[::-1]
74
-
75
- # Filter out selected titles
76
  recommendations = []
77
  count = 0
 
78
  for idx in top_indices:
79
  title = self.columns[idx]
80
  if title not in titles:
81
  display = self.title_choices[self.title_choices['title'] == title].iloc[0]
 
82
  recommendations.append([
83
  title,
84
  display['artist_name'],
85
  int(display['year']) if pd.notna(display['year']) else None,
86
- f"{scores[idx] * 100:.2f}%"
87
  ])
88
  count += 1
89
  if count >= n_recommendations:
@@ -97,7 +96,16 @@ class MatrixFactorization:
97
 
98
  def create_gradio_interface(mf_model):
99
  with gr.Blocks() as demo:
100
- gr.Markdown("# Music Recommendation System")
 
 
 
 
 
 
 
 
 
101
  with gr.Row():
102
  input_songs = gr.Dropdown(
103
  choices=sorted(mf_model.title_choices['display'].tolist()),
@@ -106,8 +114,8 @@ def create_gradio_interface(mf_model):
106
  max_choices=5,
107
  filterable=True
108
  )
109
- with gr.Row():
110
- recommend_btn = gr.Button("Get Recommendations")
111
  output_table = gr.DataFrame(
112
  headers=["Song", "Artist", "Year", "Confidence"],
113
  label="Recommendations"
 
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
+ # Get top 10000 songs by play count for better performance
22
  top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
  top_songs = top_songs.nlargest(10000, 'play_count')
24
 
25
+ # Filter original dataframe
26
  df_filtered = df[df['title'].isin(top_songs['title'])]
27
 
28
+ # Pre-compute formatted title choices for dropdown
29
+ self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
30
  self.title_choices['display'] = self.title_choices.apply(
31
+ lambda x: f"{x['title']} β€’ by {x['artist_name']}" +
32
+ (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
33
+ else f" [{int(x['year'])}]" if pd.notna(x['year'])
34
+ else f" [{x['release']}]" if pd.notna(x['release'])
35
+ else ""),
36
  axis=1
37
  )
38
 
39
+ # Create pivot table
40
  pivot = pd.pivot_table(
41
  df_filtered,
42
  values='play_count',
 
46
  )
47
  self.columns = pivot.columns
48
 
49
+ # Use sparse matrix for efficiency
50
  self.user_title_matrix = csr_matrix(pivot.values)
51
 
52
  # Train model
 
54
  self.item_vectors = self.model.components_
55
 
56
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
57
+ print(f"Number of songs available: {len(self.title_choices)}")
58
 
59
  def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
60
  if not selected_titles:
61
  return []
62
 
63
  try:
 
64
  titles = [title.split(" β€’ by ")[0] for title in selected_titles]
 
 
65
  indices = [np.where(self.columns == title)[0][0] for title in titles]
66
 
67
+ # Calculate average user vector from selected songs
68
  user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
 
 
69
  scores = np.dot(user_vector, self.item_vectors)
70
 
71
+ # Get recommendations
72
  top_indices = np.argsort(scores)[::-1]
 
 
73
  recommendations = []
74
  count = 0
75
+
76
  for idx in top_indices:
77
  title = self.columns[idx]
78
  if title not in titles:
79
  display = self.title_choices[self.title_choices['title'] == title].iloc[0]
80
+ conf_score = max(min(scores[idx] * 100, 100), 30)
81
  recommendations.append([
82
  title,
83
  display['artist_name'],
84
  int(display['year']) if pd.notna(display['year']) else None,
85
+ f"{conf_score:.2f}%"
86
  ])
87
  count += 1
88
  if count >= n_recommendations:
 
96
 
97
  def create_gradio_interface(mf_model):
98
  with gr.Blocks() as demo:
99
+ gr.Markdown("""
100
+ # 🎡 Music Recommendation System 🎢
101
+
102
+ ### Instructions:
103
+ 1. πŸ” Search songs using title, artist, album, or year
104
+ 2. 🎧 Select up to 5 songs from the dropdown
105
+ 3. πŸ‘‰ Click 'Get Recommendations' for similar songs
106
+ 4. πŸ“Š Results show song details with confidence scores
107
+ """)
108
+
109
  with gr.Row():
110
  input_songs = gr.Dropdown(
111
  choices=sorted(mf_model.title_choices['display'].tolist()),
 
114
  max_choices=5,
115
  filterable=True
116
  )
117
+ with gr.Column():
118
+ recommend_btn = gr.Button("Get Recommendations", size="lg")
119
  output_table = gr.DataFrame(
120
  headers=["Song", "Artist", "Year", "Confidence"],
121
  label="Recommendations"