GouthamVarma commited on
Commit
d49e583
·
verified ·
1 Parent(s): c8837a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +16 -10
  2. model.py +10 -2
app.py CHANGED
@@ -5,13 +5,19 @@ from sklearn.decomposition import TruncatedSVD
5
  import time
6
  from model import MatrixFactorization, create_gradio_interface
7
 
8
- # Load the preprocessed data
9
- df = pd.read_csv('data.csv')
10
-
11
- # Initialize and train the model
12
- mf_recommender = MatrixFactorization(n_factors=100)
13
- mf_recommender.fit(df)
14
-
15
- # Create and launch the Gradio interface
16
- demo = create_gradio_interface(mf_recommender)
17
- demo.launch()
 
 
 
 
 
 
 
5
  import time
6
  from model import MatrixFactorization, create_gradio_interface
7
 
8
+ try:
9
+ # Load the preprocessed data
10
+ print("Loading data...")
11
+ df = pd.read_csv('data.csv')
12
+
13
+ # Initialize and train the model
14
+ print("Initializing model...")
15
+ mf_recommender = MatrixFactorization(n_factors=50) # Reduced for speed
16
+ mf_recommender.fit(df)
17
+
18
+ # Create and launch the Gradio interface
19
+ print("Creating interface...")
20
+ demo = create_gradio_interface(mf_recommender)
21
+ demo.launch(share=True)
22
+ except Exception as e:
23
+ print(f"Error: {str(e)}")
model.py CHANGED
@@ -18,8 +18,15 @@ class MatrixFactorization:
18
  print("Training model...")
19
  start_time = time.time()
20
 
 
 
 
 
 
 
 
21
  # Pre-compute title choices for dropdown
22
- self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index()
23
  self.title_choices['display'] = self.title_choices.apply(
24
  lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
25
  axis=1
@@ -27,7 +34,7 @@ class MatrixFactorization:
27
 
28
  # Create pivot table and cache columns
29
  pivot = pd.pivot_table(
30
- df,
31
  values='play_count',
32
  index='user',
33
  columns='title',
@@ -43,6 +50,7 @@ class MatrixFactorization:
43
  self.item_vectors = self.model.components_
44
 
45
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
 
46
 
47
  def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
48
  if not selected_titles:
 
18
  print("Training model...")
19
  start_time = time.time()
20
 
21
+ # Get top 10000 songs by play count
22
+ top_songs = df.groupby(['title', 'artist_name'])['play_count'].sum().reset_index()
23
+ top_songs = top_songs.nlargest(10000, 'play_count')
24
+
25
+ # Filter original dataframe to only include top songs
26
+ df_filtered = df[df['title'].isin(top_songs['title'])]
27
+
28
  # Pre-compute title choices for dropdown
29
+ self.title_choices = df_filtered.groupby(['title', 'artist_name'])['year'].first().reset_index()
30
  self.title_choices['display'] = self.title_choices.apply(
31
  lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
32
  axis=1
 
34
 
35
  # Create pivot table and cache columns
36
  pivot = pd.pivot_table(
37
+ df_filtered,
38
  values='play_count',
39
  index='user',
40
  columns='title',
 
50
  self.item_vectors = self.model.components_
51
 
52
  print(f"Training completed in {time.time() - start_time:.2f} seconds")
53
+ print(f"Number of songs in dropdown: {len(self.title_choices)}")
54
 
55
  def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
56
  if not selected_titles: