GouthamVarma commited on
Commit
f7c6ca0
Β·
verified Β·
1 Parent(s): e95324b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +8 -7
  2. model.py +125 -109
app.py CHANGED
@@ -3,21 +3,22 @@ import pandas as pd
3
  import numpy as np
4
  from sklearn.decomposition import TruncatedSVD
5
  import time
6
- from model import MatrixFactorization, create_gradio_interface
7
 
8
  try:
9
- # Load the preprocessed data
10
  print("Loading data...")
11
  df = pd.read_csv('data.csv')
12
 
13
- # Initialize and train the model
14
  print("Initializing model...")
15
- mf_recommender = MatrixFactorization(n_factors=50) # Reduced for speed
16
  mf_recommender.fit(df)
17
 
18
- # Create and launch the Gradio interface
19
  print("Creating interface...")
20
- demo = create_gradio_interface(mf_recommender)
21
- demo.launch(share=True)
 
22
  except Exception as e:
23
  print(f"Error: {str(e)}")
 
3
  import numpy as np
4
  from sklearn.decomposition import TruncatedSVD
5
  import time
6
+ from model import MatrixFactorization
7
 
8
  try:
9
+ # Load data
10
  print("Loading data...")
11
  df = pd.read_csv('data.csv')
12
 
13
+ # Initialize model
14
  print("Initializing model...")
15
+ mf_recommender = MatrixFactorization(n_factors=100)
16
  mf_recommender.fit(df)
17
 
18
+ # Create interface
19
  print("Creating interface...")
20
+ demo = mf_recommender.create_interface()
21
+ demo.launch()
22
+
23
  except Exception as e:
24
  print(f"Error: {str(e)}")
model.py CHANGED
@@ -1,110 +1,126 @@
1
- class MatrixFactorization:
2
- def __init__(self, n_factors=100):
3
- self.n_factors = n_factors
4
- self.model = TruncatedSVD(n_components=n_factors, random_state=42)
5
- self.user_title_matrix = None
6
- self.titles_df = None
7
- self.column_names = None
8
-
9
- def fit(self, df):
10
- print("Training model...")
11
- start_time = time.time()
12
-
13
- # Create pivot table and store columns
14
- pivot = pd.pivot_table(
15
- df,
16
- values='play_count',
17
- index='user',
18
- columns='title',
19
- fill_value=0
20
- )
21
- self.column_names = pivot.columns
22
-
23
- # Convert to sparse matrix
24
- self.user_title_matrix = csr_matrix(pivot.values)
25
-
26
- self.titles_df = df.groupby('title').agg({
27
- 'artist_name': 'first',
28
- 'year': 'first',
29
- 'play_count': 'sum',
30
- 'release': 'first'
31
- })
32
-
33
- self.user_vectors = self.model.fit_transform(self.user_title_matrix)
34
- self.item_vectors = self.model.components_
35
-
36
- print(f"Training completed in {time.time() - start_time:.2f} seconds")
37
- print(f"Matrix shape: {self.user_title_matrix.shape}")
38
- print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
39
-
40
- def get_recommendations_from_titles(self, selected_display_titles, n_recommendations=5):
41
- try:
42
- actual_titles = [display.split(" β€’ by ")[0] for display in selected_display_titles]
43
-
44
- title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
45
- selected_indices = [title_to_idx[title] for title in actual_titles]
46
-
47
- user_vector = np.zeros((1, self.n_factors))
48
- for idx in selected_indices:
49
- user_vector += self.item_vectors[:, idx].reshape(1, -1)
50
- user_vector = user_vector / len(selected_indices)
51
-
52
- scores = np.dot(user_vector, self.item_vectors).flatten()
53
-
54
- # Create recommendations using stored column names
55
- title_scores = [(title, score) for title, score in zip(self.column_names, scores)
56
- if title not in actual_titles]
57
-
58
- recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:n_recommendations]
59
-
60
- results = []
61
- for title, score in recommendations:
62
- row = self.titles_df.loc[title]
63
- confidence = 30 + (score * 70) # Scale to 30-100 range
64
- results.append([
65
- title,
66
- row['artist_name'],
67
- int(row['year']) if pd.notna(row['year']) else None,
68
- f"{min(max(confidence, 30), 100):.2f}%"
69
- ])
70
-
71
- return results
72
- except Exception as e:
73
- print(f"Error in recommendations: {str(e)}")
74
- return []
75
-
76
- def create_gradio_interface(mf_model):
77
- with gr.Blocks() as demo:
78
- gr.Markdown("""
79
- # 🎡 Music Recommendation System 🎢
80
-
81
- ### Instructions:
82
- 1. ⏳ Given our large corpus of songs, it will take ~1 min to load
83
- 2. πŸ” Search songs using Song Title, Artist, Album, or Year
84
- 3. 🎧 Select up to 5 songs from the dropdown
85
- 4. πŸ‘‰ Click 'Get Recommendations' for similar songs
86
- 5. πŸ“Š Results show song details with confidence scores
87
- """)
88
-
89
- with gr.Row():
90
- input_songs = gr.Dropdown(
91
- choices=sorted(mf_model.title_choices['display'].tolist()),
92
- label="Select songs (up to 5)",
93
- multiselect=True,
94
- max_choices=5,
95
- filterable=True
96
- )
97
- with gr.Column():
98
- recommend_btn = gr.Button("Get Recommendations", size="lg")
99
- output_table = gr.DataFrame(
100
- headers=["Song", "Artist", "Year", "Confidence"],
101
- label="Recommendations"
102
- )
103
-
104
- recommend_btn.click(
105
- fn=mf_model.get_recommendations_from_titles,
106
- inputs=input_songs,
107
- outputs=output_table
108
- )
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  return demo
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.decomposition import TruncatedSVD
4
+ import time
5
+ import gradio as gr
6
+ from scipy.sparse import csr_matrix
7
+
8
+ class MatrixFactorization:
9
+ def __init__(self, n_factors=100):
10
+ self.n_factors = n_factors
11
+ self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
+ self.user_title_matrix = None
13
+ self.titles_df = None
14
+ self.column_names = None
15
+
16
+ def fit(self, df):
17
+ print("Training model...")
18
+ start_time = time.time()
19
+
20
+ # Create pivot table and store columns
21
+ pivot = pd.pivot_table(
22
+ df,
23
+ values='play_count',
24
+ index='user',
25
+ columns='title',
26
+ fill_value=0
27
+ )
28
+ self.column_names = pivot.columns
29
+
30
+ # Convert to sparse matrix
31
+ self.user_title_matrix = csr_matrix(pivot.values)
32
+
33
+ self.titles_df = df.groupby('title').agg({
34
+ 'artist_name': 'first',
35
+ 'year': 'first',
36
+ 'play_count': 'sum',
37
+ 'release': 'first'
38
+ })
39
+
40
+ self.user_vectors = self.model.fit_transform(self.user_title_matrix)
41
+ self.item_vectors = self.model.components_
42
+
43
+ print(f"Training completed in {time.time() - start_time:.2f} seconds")
44
+ print(f"Matrix shape: {self.user_title_matrix.shape}")
45
+ print(f"Explained variance ratio: {self.model.explained_variance_ratio_.sum():.4f}")
46
+
47
+ def get_recommendations(self, selected_titles):
48
+ if not selected_titles:
49
+ return []
50
+
51
+ try:
52
+ actual_titles = [title.split(" β€’ by ")[0] for title in selected_titles]
53
+ title_to_idx = {title: idx for idx, title in enumerate(self.column_names)}
54
+ selected_indices = [title_to_idx[title] for title in actual_titles]
55
+
56
+ user_vector = np.mean([self.item_vectors[:, idx] for idx in selected_indices], axis=0)
57
+ scores = np.dot(user_vector, self.item_vectors)
58
+
59
+ title_scores = [(title, score) for title, score in zip(self.column_names, scores)
60
+ if title not in actual_titles]
61
+ recommendations = sorted(title_scores, key=lambda x: x[1], reverse=True)[:5]
62
+
63
+ results = []
64
+ for title, score in recommendations:
65
+ row = self.titles_df.loc[title]
66
+ confidence = 30 + (score * 70)
67
+ results.append([
68
+ title,
69
+ row['artist_name'],
70
+ int(row['year']) if pd.notna(row['year']) else None,
71
+ f"{min(max(confidence, 30), 100):.2f}%"
72
+ ])
73
+ return results
74
+
75
+ except Exception as e:
76
+ print(f"Error in recommendations: {str(e)}")
77
+ return []
78
+
79
+ def create_interface(self):
80
+ title_choices = []
81
+ for title, row in self.titles_df.iterrows():
82
+ display_text = f"{title} β€’ by {row['artist_name']}"
83
+ extra_info = []
84
+ if pd.notna(row['release']):
85
+ extra_info.append(row['release'])
86
+ if pd.notna(row['year']):
87
+ extra_info.append(str(int(row['year'])))
88
+ if extra_info:
89
+ display_text += f" [{', '.join(extra_info)}]"
90
+ title_choices.append(display_text)
91
+
92
+ def create_gradio_interface(mf_model):
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("""
95
+ # 🎡 Music Recommendation System 🎢
96
+
97
+ ### Instructions:
98
+ 1. ⏳ Given our large corpus, it will take ~1 min to load the model
99
+ 1. πŸ” Search songs using title, artist, album, or year
100
+ 2. 🎧 Select up to 5 songs from the dropdown
101
+ 3. πŸ‘‰ Click 'Get Recommendations' for similar songs
102
+ 4. πŸ“Š Results show song details with confidence scores
103
+ """)
104
+
105
+ with gr.Row():
106
+ input_songs = gr.Dropdown(
107
+ choices=sorted(mf_model.title_choices['display'].tolist()),
108
+ label="Select songs (up to 5)",
109
+ multiselect=True,
110
+ max_choices=5,
111
+ filterable=True
112
+ )
113
+ with gr.Column():
114
+ recommend_btn = gr.Button("Get Recommendations", size="lg")
115
+ output_table = gr.DataFrame(
116
+ headers=["Song", "Artist", "Year", "Confidence"],
117
+ label="Recommendations"
118
+ )
119
+
120
+ recommend_btn.click(
121
+ fn=mf_model.get_recommendations_from_titles,
122
+ inputs=input_songs,
123
+ outputs=output_table
124
+ )
125
+
126
  return demo