GouthamVarma commited on
Commit
f96ec1f
Β·
verified Β·
1 Parent(s): 98ae331

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +128 -129
model.py CHANGED
@@ -1,130 +1,129 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.decomposition import TruncatedSVD
4
- import time
5
- import gradio as gr
6
- from scipy.sparse import csr_matrix
7
-
8
- class MatrixFactorization:
9
- def __init__(self, n_factors=50):
10
- self.n_factors = n_factors
11
- self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
- self.user_title_matrix = None
13
- self.titles_df = None
14
- self.title_choices = None
15
- self.columns = None
16
-
17
- def fit(self, df):
18
- print("Training model...")
19
- start_time = time.time()
20
-
21
- # Get top 10000 songs by play count for better performance
22
- top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
- top_songs = top_songs.nlargest(10000, 'play_count')
24
-
25
- # Filter original dataframe
26
- df_filtered = df[df['title'].isin(top_songs['title'])]
27
-
28
- # Pre-compute formatted title choices for dropdown
29
- self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
30
- self.title_choices['display'] = self.title_choices.apply(
31
- lambda x: f"{x['title']} β€’ by {x['artist_name']}" +
32
- (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
33
- else f" [{int(x['year'])}]" if pd.notna(x['year'])
34
- else f" [{x['release']}]" if pd.notna(x['release'])
35
- else ""),
36
- axis=1
37
- )
38
-
39
- # Create pivot table
40
- pivot = pd.pivot_table(
41
- df_filtered,
42
- values='play_count',
43
- index='user',
44
- columns='title',
45
- fill_value=0
46
- )
47
- self.columns = pivot.columns
48
-
49
- # Use sparse matrix for efficiency
50
- self.user_title_matrix = csr_matrix(pivot.values)
51
-
52
- # Train model
53
- self.user_vectors = self.model.fit_transform(self.user_title_matrix)
54
- self.item_vectors = self.model.components_
55
-
56
- print(f"Training completed in {time.time() - start_time:.2f} seconds")
57
- print(f"Number of songs available: {len(self.title_choices)}")
58
-
59
- def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
60
- if not selected_titles:
61
- return []
62
-
63
- try:
64
- titles = [title.split(" β€’ by ")[0] for title in selected_titles]
65
- indices = [np.where(self.columns == title)[0][0] for title in titles]
66
-
67
- # Calculate average user vector from selected songs
68
- user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
69
- scores = np.dot(user_vector, self.item_vectors)
70
-
71
- # Get recommendations
72
- top_indices = np.argsort(scores)[::-1]
73
- recommendations = []
74
- count = 0
75
-
76
- for idx in top_indices:
77
- title = self.columns[idx]
78
- if title not in titles:
79
- display = self.title_choices[self.title_choices['title'] == title].iloc[0]
80
- conf_score = max(min(scores[idx] * 100, 100), 30)
81
- recommendations.append([
82
- title,
83
- display['artist_name'],
84
- int(display['year']) if pd.notna(display['year']) else None,
85
- f"{conf_score:.2f}%"
86
- ])
87
- count += 1
88
- if count >= n_recommendations:
89
- break
90
-
91
- return recommendations
92
-
93
- except Exception as e:
94
- print(f"Error in recommendations: {str(e)}")
95
- return []
96
-
97
- def create_gradio_interface(mf_model):
98
- with gr.Blocks() as demo:
99
- gr.Markdown("""
100
- # 🎡 Music Recommendation System 🎢
101
-
102
- ### Instructions:
103
- 1. πŸ” Search songs using title, artist, album, or year
104
- 2. 🎧 Select up to 5 songs from the dropdown
105
- 3. πŸ‘‰ Click 'Get Recommendations' for similar songs
106
- 4. πŸ“Š Results show song details with confidence scores
107
- """)
108
-
109
- with gr.Row():
110
- input_songs = gr.Dropdown(
111
- choices=sorted(mf_model.title_choices['display'].tolist()),
112
- label="Select songs (up to 5)",
113
- multiselect=True,
114
- max_choices=5,
115
- filterable=True
116
- )
117
- with gr.Column():
118
- recommend_btn = gr.Button("Get Recommendations", size="lg")
119
- output_table = gr.DataFrame(
120
- headers=["Song", "Artist", "Year", "Confidence"],
121
- label="Recommendations"
122
- )
123
-
124
- recommend_btn.click(
125
- fn=mf_model.get_recommendations_from_titles,
126
- inputs=input_songs,
127
- outputs=output_table
128
- )
129
-
130
  return demo
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.decomposition import TruncatedSVD
4
+ import time
5
+ import gradio as gr
6
+ from scipy.sparse import csr_matrix
7
+
8
+ class MatrixFactorization:
9
+ def __init__(self, n_factors=50):
10
+ self.n_factors = n_factors
11
+ self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
+ self.user_title_matrix = None
13
+ self.titles_df = None
14
+ self.title_choices = None
15
+ self.columns = None
16
+
17
+ def fit(self, df):
18
+ print("Training model...")
19
+ start_time = time.time()
20
+
21
+ # Get top 10000 songs by play count for better performance
22
+ top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
+ top_songs = top_songs.nlargest(10000, 'play_count')
24
+
25
+ # Filter original dataframe
26
+ df_filtered = df[df['title'].isin(top_songs['title'])]
27
+
28
+ # Pre-compute formatted title choices for dropdown
29
+ self.title_choices = df_filtered.groupby(['title', 'artist_name', 'release'])['year'].first().reset_index()
30
+ self.title_choices['display'] = self.title_choices.apply(
31
+ lambda x: f"{x['title']} β€’ by {x['artist_name']}" +
32
+ (f" [{x['release']}, {int(x['year'])}]" if pd.notna(x['year']) and pd.notna(x['release'])
33
+ else f" [{int(x['year'])}]" if pd.notna(x['year'])
34
+ else f" [{x['release']}]" if pd.notna(x['release'])
35
+ else ""),
36
+ axis=1
37
+ )
38
+
39
+ # Create pivot table
40
+ pivot = pd.pivot_table(
41
+ df_filtered,
42
+ values='play_count',
43
+ index='user',
44
+ columns='title',
45
+ fill_value=0
46
+ )
47
+ self.columns = pivot.columns
48
+
49
+ # Use sparse matrix for efficiency
50
+ self.user_title_matrix = csr_matrix(pivot.values)
51
+
52
+ # Train model
53
+ self.user_vectors = self.model.fit_transform(self.user_title_matrix)
54
+ self.item_vectors = self.model.components_
55
+
56
+ print(f"Training completed in {time.time() - start_time:.2f} seconds")
57
+ print(f"Number of songs available: {len(self.title_choices)}")
58
+
59
+ def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
60
+ try:
61
+ actual_titles = [display.split(" β€’ by ")[0] for display in selected_display_titles]
62
+
63
+ title_to_idx = {title: idx for idx, title in enumerate(self.user_title_matrix.columns)}
64
+ selected_indices = [title_to_idx[title] for title in actual_titles]
65
+
66
+ user_vector = np.zeros((1, self.n_factors))
67
+ for idx in selected_indices:
68
+ user_vector += self.item_vectors[:, idx].reshape(1, -1)
69
+ user_vector = user_vector / len(selected_indices)
70
+
71
+ predicted_ratings = np.dot(user_vector, self.item_vectors)
72
+ predicted_ratings = predicted_ratings.flatten()
73
+
74
+ titles = self.user_title_matrix.columns
75
+ title_scores = [(title, score) for title, score in zip(titles, predicted_ratings)
76
+ if title not in actual_titles]
77
+
78
+ recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
79
+
80
+ results = []
81
+ for title, score in recommendations:
82
+ row = self.titles_df.loc[title]
83
+ confidence = 30 + (score * 70)
84
+ results.append([
85
+ title,
86
+ row['artist_name'],
87
+ int(row['year']) if pd.notna(row['year']) else None,
88
+ f"{min(max(confidence, 30), 100):.2f}%"
89
+ ])
90
+
91
+ return results
92
+ except Exception as e:
93
+ print(f"Error in recommendations: {str(e)}")
94
+ return []
95
+
96
+ def create_gradio_interface(mf_model):
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("""
99
+ # 🎡 Music Recommendation System 🎢
100
+
101
+ ### Instructions:
102
+ 1. πŸ” Search songs using title, artist, album, or year
103
+ 2. 🎧 Select up to 5 songs from the dropdown
104
+ 3. πŸ‘‰ Click 'Get Recommendations' for similar songs
105
+ 4. πŸ“Š Results show song details with confidence scores
106
+ """)
107
+
108
+ with gr.Row():
109
+ input_songs = gr.Dropdown(
110
+ choices=sorted(mf_model.title_choices['display'].tolist()),
111
+ label="Select songs (up to 5)",
112
+ multiselect=True,
113
+ max_choices=5,
114
+ filterable=True
115
+ )
116
+ with gr.Column():
117
+ recommend_btn = gr.Button("Get Recommendations", size="lg")
118
+ output_table = gr.DataFrame(
119
+ headers=["Song", "Artist", "Year", "Confidence"],
120
+ label="Recommendations"
121
+ )
122
+
123
+ recommend_btn.click(
124
+ fn=mf_model.get_recommendations_from_titles,
125
+ inputs=input_songs,
126
+ outputs=output_table
127
+ )
128
+
 
129
  return demo