GouthamVarma commited on
Commit
81b661c
·
verified ·
1 Parent(s): e909920

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +16 -13
  3. app.py +17 -0
  4. data.csv +3 -0
  5. model.py +114 -0
  6. requirements.txt +4 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data.csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,16 @@
1
- ---
2
- title: Spotify
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.8.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ # Music Recommendation System
2
+
3
+ A music recommendation system built using matrix factorization and deployed on Hugging Face Spaces.
4
+
5
+ ## Overview
6
+ This application provides music recommendations based on user-selected songs. It uses truncated SVD for matrix factorization to generate recommendations.
7
+
8
+ ## How to Use
9
+ 1. Select up to 5 songs you like from the dropdown menu
10
+ 2. Click "Get Recommendations" to see similar songs
11
+ 3. Each recommendation comes with a confidence score
12
+
13
+ ## Technical Details
14
+ - Built using Python, Gradio, and scikit-learn
15
+ - Uses TruncatedSVD for matrix factorization
16
+ - Deployed on Hugging Face Spaces
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.decomposition import TruncatedSVD
5
+ import time
6
+ from model import MatrixFactorization, create_gradio_interface
7
+
8
+ # Load the preprocessed data
9
+ df = pd.read_csv('data.csv')
10
+
11
+ # Initialize and train the model
12
+ mf_recommender = MatrixFactorization(n_factors=100)
13
+ mf_recommender.fit(df)
14
+
15
+ # Create and launch the Gradio interface
16
+ demo = create_gradio_interface(mf_recommender)
17
+ demo.launch()
data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0bef871c15556cc555f4bc94d9c43c70019e0368a0c4a59e64802237d83ec7b
3
+ size 18392003
model.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.decomposition import TruncatedSVD
4
+ import time
5
+ import gradio as gr
6
+ from scipy.sparse import csr_matrix
7
+
8
+ class MatrixFactorization:
9
+ def __init__(self, n_factors=50):
10
+ self.n_factors = n_factors
11
+ self.model = TruncatedSVD(n_components=n_factors, random_state=42)
12
+ self.user_title_matrix = None
13
+ self.titles_df = None
14
+ self.title_choices = None
15
+ self.columns = None
16
+
17
+ def fit(self, df):
18
+ print("Training model...")
19
+ start_time = time.time()
20
+
21
+ # Pre-compute title choices for dropdown
22
+ self.title_choices = df.groupby(['title', 'artist_name'])['year'].first().reset_index()
23
+ self.title_choices['display'] = self.title_choices.apply(
24
+ lambda x: f"{x['title']} • by {x['artist_name']}" + (f" [{int(x['year'])}]" if pd.notna(x['year']) else ""),
25
+ axis=1
26
+ )
27
+
28
+ # Create pivot table and cache columns
29
+ pivot = pd.pivot_table(
30
+ df,
31
+ values='play_count',
32
+ index='user',
33
+ columns='title',
34
+ fill_value=0
35
+ )
36
+ self.columns = pivot.columns
37
+
38
+ # Convert to sparse matrix
39
+ self.user_title_matrix = csr_matrix(pivot.values)
40
+
41
+ # Train model
42
+ self.user_vectors = self.model.fit_transform(self.user_title_matrix)
43
+ self.item_vectors = self.model.components_
44
+
45
+ print(f"Training completed in {time.time() - start_time:.2f} seconds")
46
+
47
+ def get_recommendations_from_titles(self, selected_titles, n_recommendations=5):
48
+ if not selected_titles:
49
+ return []
50
+
51
+ try:
52
+ # Extract titles from display format
53
+ titles = [title.split(" • by ")[0] for title in selected_titles]
54
+
55
+ # Get indices of selected titles
56
+ indices = [np.where(self.columns == title)[0][0] for title in titles]
57
+
58
+ # Calculate user vector
59
+ user_vector = np.mean([self.item_vectors[:, idx] for idx in indices], axis=0)
60
+
61
+ # Get predictions
62
+ scores = np.dot(user_vector, self.item_vectors)
63
+
64
+ # Get top recommendations
65
+ top_indices = np.argsort(scores)[::-1]
66
+
67
+ # Filter out selected titles
68
+ recommendations = []
69
+ count = 0
70
+ for idx in top_indices:
71
+ title = self.columns[idx]
72
+ if title not in titles:
73
+ display = self.title_choices[self.title_choices['title'] == title].iloc[0]
74
+ recommendations.append([
75
+ title,
76
+ display['artist_name'],
77
+ int(display['year']) if pd.notna(display['year']) else None,
78
+ f"{scores[idx] * 100:.2f}%"
79
+ ])
80
+ count += 1
81
+ if count >= n_recommendations:
82
+ break
83
+
84
+ return recommendations
85
+
86
+ except Exception as e:
87
+ print(f"Error in recommendations: {str(e)}")
88
+ return []
89
+
90
+ def create_gradio_interface(mf_model):
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("# Music Recommendation System")
93
+ with gr.Row():
94
+ input_songs = gr.Dropdown(
95
+ choices=sorted(mf_model.title_choices['display'].tolist()),
96
+ label="Select songs (up to 5)",
97
+ multiselect=True,
98
+ max_choices=5,
99
+ filterable=True
100
+ )
101
+ with gr.Row():
102
+ recommend_btn = gr.Button("Get Recommendations")
103
+ output_table = gr.DataFrame(
104
+ headers=["Song", "Artist", "Year", "Confidence"],
105
+ label="Recommendations"
106
+ )
107
+
108
+ recommend_btn.click(
109
+ fn=mf_model.get_recommendations_from_titles,
110
+ inputs=input_songs,
111
+ outputs=output_table
112
+ )
113
+
114
+ return demo
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.19.2
2
+ numpy==1.24.3
3
+ pandas==2.0.3
4
+ scikit-learn==1.3.0