GouthamVarma commited on
Commit
ac960a0
Β·
verified Β·
1 Parent(s): eefe640

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +49 -21
model.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  from scipy.sparse import csr_matrix
7
 
8
  class MatrixFactorization:
9
- def __init__(self, n_factors=50): # Reduced factors
10
  self.n_factors = n_factors
11
  self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
  self.user_title_matrix = None
@@ -18,16 +18,12 @@ class MatrixFactorization:
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
- # Get top songs by play count
22
- top_songs = (df.groupby('title')['play_count']
23
- .sum()
24
- .sort_values(ascending=False)
25
- .head(10000)
26
- .index)
27
-
28
  df_filtered = df[df['title'].isin(top_songs)]
29
- print(f"Filtered to {len(top_songs)} most played songs")
30
 
 
31
  pivot = pd.pivot_table(
32
  df_filtered,
33
  values='play_count',
@@ -36,8 +32,11 @@ class MatrixFactorization:
36
  fill_value=0
37
  )
38
  self.column_names = pivot.columns
 
 
39
  self.user_title_matrix = csr_matrix(pivot.values)
40
 
 
41
  self.titles_df = df_filtered.groupby('title').agg({
42
  'artist_name': 'first',
43
  'year': 'first',
@@ -49,13 +48,45 @@ class MatrixFactorization:
49
  self.user_vectors = self.model.fit_transform(self.user_title_matrix)
50
  self.item_vectors = self.model.components_
51
 
52
- # Pre-cache choices
53
- self._cached_choices = self._generate_choices()
54
 
55
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
56
 
57
- def _generate_choices(self):
58
- choices = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  for title, row in self.titles_df.iterrows():
60
  display_text = f"{title} β€’ by {row['artist_name']}"
61
  extra_info = []
@@ -65,11 +96,8 @@ class MatrixFactorization:
65
  extra_info.append(str(int(row['year'])))
66
  if extra_info:
67
  display_text += f" [{', '.join(extra_info)}]"
68
- choices.append(display_text)
69
- return sorted(choices)
70
-
71
- def create_title_choices(self):
72
- return self._cached_choices if self._cached_choices else self._generate_choices()
73
 
74
  def create_gradio_interface(mf_model):
75
  try:
@@ -77,7 +105,7 @@ def create_gradio_interface(mf_model):
77
  gr.Markdown("""# 🎡 Music Recommendation System 🎢
78
 
79
  ### Instructions:
80
- 1. ⏳ Loading ~10,000 most popular songs
81
  2. πŸ” Search by title, artist, album, or year
82
  3. 🎧 Select up to 5 songs
83
  4. πŸ‘‰ Click for recommendations
@@ -86,7 +114,7 @@ def create_gradio_interface(mf_model):
86
 
87
  with gr.Row():
88
  input_songs = gr.Dropdown(
89
- choices=mf_model.create_title_choices(),
90
  label="Search and select songs (up to 5)",
91
  info="Format: Title β€’ by Artist [Album, Year]",
92
  multiselect=True,
@@ -102,7 +130,7 @@ def create_gradio_interface(mf_model):
102
  )
103
 
104
  recommend_btn.click(
105
- fn=mf_model.get_recommendations_from_titles,
106
  inputs=input_songs,
107
  outputs=output_table
108
  )
 
6
  from scipy.sparse import csr_matrix
7
 
8
  class MatrixFactorization:
9
+ def __init__(self, n_factors=50):
10
  self.n_factors = n_factors
11
  self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
  self.user_title_matrix = None
 
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
+ # Get top 10000 songs
22
+ top_songs = df.groupby('title')['play_count'].sum().nlargest(10000).index
 
 
 
 
 
23
  df_filtered = df[df['title'].isin(top_songs)]
24
+ print("Filtered to 10000 most played songs")
25
 
26
+ # Create pivot table
27
  pivot = pd.pivot_table(
28
  df_filtered,
29
  values='play_count',
 
32
  fill_value=0
33
  )
34
  self.column_names = pivot.columns
35
+
36
+ # Convert to sparse matrix
37
  self.user_title_matrix = csr_matrix(pivot.values)
38
 
39
+ # Create titles dataframe
40
  self.titles_df = df_filtered.groupby('title').agg({
41
  'artist_name': 'first',
42
  'year': 'first',
 
48
  self.user_vectors = self.model.fit_transform(self.user_title_matrix)
49
  self.item_vectors = self.model.components_
50
 
51
+ # Cache choices
52
+ self._cached_choices = self.create_title_choices()
53
 
54
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
55
 
56
+ def get_recommendations(self, selected_titles):
57
+ if not selected_titles:
58
+ return []
59
+
60
+ try:
61
+ actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
62
+ title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
63
+ selected_indices = [title_to_idx[title] for title in actual_titles]
64
+
65
+ user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
66
+ scores = np.dot(user_vector, self.item_vectors)
67
+
68
+ title_scores = [(title, score) for title, score in zip(self.column_names, scores)
69
+ if title not in actual_titles]
70
+ recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
71
+
72
+ results = []
73
+ for title, score in recommendations:
74
+ row = self.titles_df.loc[title]
75
+ confidence = 30 + (score * 70)
76
+ results.append([
77
+ title,
78
+ row['artist_name'],
79
+ int(row['year']) if pd.notna(row['year']) else None,
80
+ f"{min(max(confidence, 30), 100):.2f}%"
81
+ ])
82
+ return results
83
+
84
+ except Exception as e:
85
+ print(f"Error in recommendations: {str(e)}")
86
+ return []
87
+
88
+ def create_title_choices(self):
89
+ title_choices = []
90
  for title, row in self.titles_df.iterrows():
91
  display_text = f"{title} β€’ by {row['artist_name']}"
92
  extra_info = []
 
96
  extra_info.append(str(int(row['year'])))
97
  if extra_info:
98
  display_text += f" [{', '.join(extra_info)}]"
99
+ title_choices.append(display_text)
100
+ return sorted(title_choices)
 
 
 
101
 
102
  def create_gradio_interface(mf_model):
103
  try:
 
105
  gr.Markdown("""# 🎡 Music Recommendation System 🎢
106
 
107
  ### Instructions:
108
+ 1. ⏳ Model loads top 10000 songs (~1 min)
109
  2. πŸ” Search by title, artist, album, or year
110
  3. 🎧 Select up to 5 songs
111
  4. πŸ‘‰ Click for recommendations
 
114
 
115
  with gr.Row():
116
  input_songs = gr.Dropdown(
117
+ choices=mf_model._cached_choices,
118
  label="Search and select songs (up to 5)",
119
  info="Format: Title β€’ by Artist [Album, Year]",
120
  multiselect=True,
 
130
  )
131
 
132
  recommend_btn.click(
133
+ fn=mf_model.get_recommendations,
134
  inputs=input_songs,
135
  outputs=output_table
136
  )