GouthamVarma commited on
Commit
eefe640
Β·
verified Β·
1 Parent(s): f80207b

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +112 -129
model.py CHANGED
@@ -1,130 +1,113 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.decomposition import TruncatedSVD
4
- import time
5
- import gradio as gr
6
- from scipy.sparse import csr_matrix
7
-
8
- class MatrixFactorization:
9
- def __init__(self, n_factors=100):
10
- self.n_factors = n_factors
11
- self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
- self.user_title_matrix = None
13
- self.titles_df = None
14
- self.column_names = None
15
-
16
- def fit(self, df):
17
- print("Training model...")
18
- start_time = time.time()
19
-
20
- pivot = pd.pivot_table(
21
- df,
22
- values='play_count',
23
- index='user',
24
- columns='title',
25
- fill_value=0
26
- )
27
- self.column_names = pivot.columns
28
-
29
- self.user_title_matrix = csr_matrix(pivot.values)
30
-
31
- self.titles_df = df.groupby('title').agg({
32
- 'artist_name': 'first',
33
- 'year': 'first',
34
- 'play_count': 'sum',
35
- 'release': 'first'
36
- })
37
-
38
- self.user_vectors = self.model.fit_transform(self.user_title_matrix)
39
- self.item_vectors = self.model.components_
40
-
41
- print(f"Training completed in {time.time() - start_time:.2f} seconds")
42
- print(f"Matrix shape: {self.user_title_matrix.shape}")
43
- print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
44
-
45
- def get_recommendations_from_titles(self, selected_titles):
46
- if not selected_titles:
47
- return []
48
-
49
- try:
50
- actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
51
- title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
52
- selected_indices = [title_to_idx[title] for title in actual_titles]
53
-
54
- user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
55
- scores = np.dot(user_vector, self.item_vectors)
56
-
57
- title_scores = [(title, score) for title, score in zip(self.column_names, scores)
58
- if title not in actual_titles]
59
- recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
60
-
61
- results = []
62
- for title, score in recommendations:
63
- row = self.titles_df.loc[title]
64
- confidence = 30 + (score * 70)
65
- results.append([
66
- title,
67
- row['artist_name'],
68
- int(row['year']) if pd.notna(row['year']) else None,
69
- f"{min(max(confidence, 30), 100):.2f}%"
70
- ])
71
- return results
72
-
73
- except Exception as e:
74
- print(f"Error in recommendations: {str(e)}")
75
- return []
76
-
77
- def create_title_choices(self):
78
- title_choices = []
79
- for title, row in self.titles_df.iterrows():
80
- display_text = f"{title} β€’ by {row['artist_name']}"
81
- extra_info = []
82
- if pd.notna(row['release']):
83
- extra_info.append(row['release'])
84
- if pd.notna(row['year']):
85
- extra_info.append(str(int(row['year'])))
86
- if extra_info:
87
- display_text += f" [{', '.join(extra_info)}]"
88
- title_choices.append(display_text)
89
- return title_choices
90
-
91
- def create_gradio_interface(mf_model):
92
- try:
93
- with gr.Blocks() as demo:
94
- gr.Markdown("""# 🎡 Music Recommendation System 🎢
95
-
96
- ### Instructions:
97
- 1. ⏳ Given our large corpus, it will take ~1 min to load the model
98
- 2. πŸ” Search songs using title, artist, album, or year
99
- 3. 🎧 Select up to 5 songs from the dropdown
100
- 4. πŸ‘‰ Click 'Get Recommendations' for similar songs
101
- 5. πŸ“Š Results show song details with confidence scores (30-100%)
102
- """)
103
-
104
- with gr.Row():
105
- input_songs = gr.Dropdown(
106
- choices=sorted(mf_model.create_title_choices()),
107
- label="Search and select songs (up to 5)",
108
- info="Format: Title β€’ by Artist [Album, Year]",
109
- multiselect=True,
110
- max_choices=5,
111
- filterable=True
112
- )
113
-
114
- with gr.Column():
115
- recommend_btn = gr.Button("Get Recommendations", size="lg")
116
- output_table = gr.DataFrame(
117
- headers=["Song", "Artist", "Year", "Confidence"],
118
- label="Recommended Songs"
119
- )
120
-
121
- recommend_btn.click(
122
- fn=mf_model.get_recommendations_from_titles,
123
- inputs=input_songs,
124
- outputs=output_table
125
- )
126
-
127
- return demo
128
- except Exception as e:
129
- print(f"Error creating interface: {str(e)}")
130
  return None
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.decomposition import TruncatedSVD
4
+ import time
5
+ import gradio as gr
6
+ from scipy.sparse import csr_matrix
7
+
8
+ class MatrixFactorization:
9
+ def __init__(self, n_factors=50): # Reduced factors
10
+ self.n_factors = n_factors
11
+ self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
+ self.user_title_matrix = None
13
+ self.titles_df = None
14
+ self.column_names = None
15
+ self._cached_choices = None
16
+
17
+ def fit(self, df):
18
+ print("Training model...")
19
+ start_time = time.time()
20
+
21
+ # Get top songs by play count
22
+ top_songs = (df.groupby('title')['play_count']
23
+ .sum()
24
+ .sort_values(ascending=False)
25
+ .head(10000)
26
+ .index)
27
+
28
+ df_filtered = df[df['title'].isin(top_songs)]
29
+ print(f"Filtered to {len(top_songs)} most played songs")
30
+
31
+ pivot = pd.pivot_table(
32
+ df_filtered,
33
+ values='play_count',
34
+ index='user',
35
+ columns='title',
36
+ fill_value=0
37
+ )
38
+ self.column_names = pivot.columns
39
+ self.user_title_matrix = csr_matrix(pivot.values)
40
+
41
+ self.titles_df = df_filtered.groupby('title').agg({
42
+ 'artist_name': 'first',
43
+ 'year': 'first',
44
+ 'play_count': 'sum',
45
+ 'release': 'first'
46
+ })
47
+
48
+ print("Training SVD model...")
49
+ self.user_vectors = self.model.fit_transform(self.user_title_matrix)
50
+ self.item_vectors = self.model.components_
51
+
52
+ # Pre-cache choices
53
+ self._cached_choices = self._generate_choices()
54
+
55
+ print(f"Training completed in {time.time() - start_time:.2f} seconds")
56
+
57
+ def _generate_choices(self):
58
+ choices = []
59
+ for title, row in self.titles_df.iterrows():
60
+ display_text = f"{title} β€’ by {row['artist_name']}"
61
+ extra_info = []
62
+ if pd.notna(row['release']):
63
+ extra_info.append(row['release'])
64
+ if pd.notna(row['year']):
65
+ extra_info.append(str(int(row['year'])))
66
+ if extra_info:
67
+ display_text += f" [{', '.join(extra_info)}]"
68
+ choices.append(display_text)
69
+ return sorted(choices)
70
+
71
+ def create_title_choices(self):
72
+ return self._cached_choices if self._cached_choices else self._generate_choices()
73
+
74
+ def create_gradio_interface(mf_model):
75
+ try:
76
+ with gr.Blocks() as demo:
77
+ gr.Markdown("""# 🎡 Music Recommendation System 🎢
78
+
79
+ ### Instructions:
80
+ 1. ⏳ Loading ~10,000 most popular songs
81
+ 2. πŸ” Search by title, artist, album, or year
82
+ 3. 🎧 Select up to 5 songs
83
+ 4. πŸ‘‰ Click for recommendations
84
+ 5. πŸ“Š View confidence scores (30-100%)
85
+ """)
86
+
87
+ with gr.Row():
88
+ input_songs = gr.Dropdown(
89
+ choices=mf_model.create_title_choices(),
90
+ label="Search and select songs (up to 5)",
91
+ info="Format: Title β€’ by Artist [Album, Year]",
92
+ multiselect=True,
93
+ max_choices=5,
94
+ filterable=True
95
+ )
96
+
97
+ with gr.Column():
98
+ recommend_btn = gr.Button("Get Recommendations", size="lg")
99
+ output_table = gr.DataFrame(
100
+ headers=["Song", "Artist", "Year", "Confidence"],
101
+ label="Recommended Songs"
102
+ )
103
+
104
+ recommend_btn.click(
105
+ fn=mf_model.get_recommendations_from_titles,
106
+ inputs=input_songs,
107
+ outputs=output_table
108
+ )
109
+
110
+ return demo
111
+ except Exception as e:
112
+ print(f"Error creating interface: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  return None