gagan3012 commited on
Commit
0886d5c
·
verified ·
1 Parent(s): 383d1e2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -0
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.neighbors import NearestNeighbors
5
+ from sklearn.decomposition import TruncatedSVD
6
+ import numpy as np
7
+
8
+ # Page config
9
+ st.set_page_config(
10
+ page_title="MusicMind - Smart Music Recommendations",
11
+ page_icon="🎵",
12
+ layout="wide"
13
+ )
14
+
15
+ # Custom CSS
16
+ st.markdown("""
17
+ <style>
18
+ .main {
19
+ background-color: #f9f9f9;
20
+ }
21
+ .stButton button {
22
+ background-color: #76818e;
23
+ color: white;
24
+ border-radius: 20px;
25
+ padding: 10px 25px;
26
+ border: none;
27
+ transition: background-color 0.3s;
28
+ }
29
+ .stButton button:hover {
30
+ background-color: #5348d4;
31
+ }
32
+ .recommendation-card {
33
+ background-color: #76818e;
34
+ padding: 20px;
35
+ border-radius: 10px;
36
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
37
+ margin: 10px 0;
38
+ transition: transform 0.3s;
39
+ }
40
+ .recommendation-card:hover {
41
+ transform: translateY(-5px);
42
+ }
43
+ .recommendation-card h3 {
44
+ font-family: 'Poppins', sans-serif;
45
+ color: #e7d7c1;
46
+ }
47
+ .recommendation-card p {
48
+ font-family: 'Roboto', sans-serif;
49
+ color: #262730;
50
+ }
51
+ .recommendation-card a {
52
+ text-decoration: none;
53
+ color: #e7d7c1;
54
+ font-weight: 600;
55
+ display: inline-block;
56
+ margin-top: 10px;
57
+ transition: color 0.3s;
58
+ }
59
+ .youtube-link {
60
+ background-color: #ff4b4b;
61
+ color: white !important;
62
+ padding: 8px 16px;
63
+ border-radius: 20px;
64
+ text-decoration: none;
65
+ display: inline-flex;
66
+ align-items: center;
67
+ gap: 8px;
68
+ font-family: 'Roboto', sans-serif;
69
+ font-weight: 500;
70
+ transition: all 0.3s ease;
71
+ }
72
+ .youtube-link:before {
73
+ content: "▶";
74
+ font-size: 0.8em;
75
+ }
76
+ .youtube-link:hover {
77
+ background-color: #cc0000;
78
+ transform: scale(1.05);
79
+ box-shadow: 0 4px 12px rgba(255, 0, 0, 0.2);
80
+ }
81
+ </style>
82
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
83
+ """, unsafe_allow_html=True)
84
+
85
+ # Load and prepare data
86
+ @st.cache_data
87
+ def load_data():
88
+ df = pd.read_csv("song_dataset.csv")
89
+ return df
90
+
91
+ df = load_data()
92
+
93
+ @st.cache_resource
94
+ def run_imps(df):
95
+ required_columns = ['user', 'song', 'play_count', 'title', 'artist_name', 'release']
96
+ if not all(col in df.columns for col in required_columns):
97
+ raise ValueError(f"Dataset must contain the following columns: {required_columns}")
98
+
99
+ df = df.drop_duplicates(subset=['song', 'title', 'artist_name', 'release'])
100
+ df['combined_features'] = (df['title'] + " " + df['artist_name'] + " " + df['release']).fillna("")
101
+
102
+ # Content-Based Filtering
103
+ tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
104
+ tfidf_matrix = tfidf.fit_transform(df['combined_features'])
105
+
106
+ nn = NearestNeighbors(n_neighbors=10, metric='cosine', algorithm='auto')
107
+ nn.fit(tfidf_matrix)
108
+
109
+ # Collaborative Filtering
110
+ user_song_matrix = df.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
111
+ svd = TruncatedSVD(n_components=20)
112
+ user_factors = svd.fit_transform(user_song_matrix)
113
+ song_factors = svd.components_.T
114
+
115
+ return df, tfidf, tfidf_matrix, nn, user_song_matrix, user_factors, song_factors
116
+
117
+ df, tfidf, tfidf_matrix, nn, user_song_matrix, user_factors, song_factors = run_imps(df)
118
+
119
+ # Content-based recommendation function
120
+ def content_based_recommend(song_title, top_n=5):
121
+ try:
122
+ idx = df[df['title'] == song_title].index[0]
123
+ distances, indices = nn.kneighbors(tfidf_matrix[idx], n_neighbors=top_n + 1)
124
+ song_indices = indices.flatten()[1:]
125
+ return df.iloc[song_indices][['title', 'artist_name', 'release']].drop_duplicates()
126
+ except IndexError:
127
+ return pd.DataFrame(columns=['title', 'artist_name', 'release'])
128
+
129
+ def collaborative_recommend(user_id, top_n=5):
130
+ if user_id not in user_song_matrix.index:
131
+ return pd.DataFrame(columns=['title', 'artist_name', 'release'])
132
+
133
+ user_vector = user_factors[user_song_matrix.index.get_loc(user_id)]
134
+ scores = np.dot(song_factors, user_vector)
135
+
136
+ listened_songs = user_song_matrix.loc[user_id][user_song_matrix.loc[user_id] > 0].index
137
+ scores = {song: score for song, score in zip(user_song_matrix.columns, scores) if song not in listened_songs}
138
+
139
+ recommended_songs = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
140
+ recommended_song_ids = [song for song, _ in recommended_songs]
141
+ return df[df['song'].isin(recommended_song_ids)][['title', 'artist_name', 'release']].drop_duplicates()
142
+
143
+ # Hybrid Recommendation
144
+ def hybrid_recommendv2(user_id, song_titles, top_n=5):
145
+ collab_recs = collaborative_recommend(user_id, top_n)
146
+ content_recs = pd.DataFrame()
147
+ for song_title in song_titles:
148
+ content_recs = pd.concat([content_recs, content_based_recommend(song_title, top_n)], ignore_index=True)
149
+ hybrid_recs = pd.concat([collab_recs, content_recs]).drop_duplicates().sample(frac=1).reset_index(drop=True)
150
+ return hybrid_recs.head(top_n)
151
+
152
+ # Sidebar and Main UI
153
+ with st.sidebar:
154
+ st.header("🎯 Customize Your Recommendations")
155
+ user_id = st.selectbox(
156
+ "Select User ID",
157
+ options=df['user'].unique(),
158
+ index=0
159
+ )
160
+ user_songs = df[df['user'] == user_id]['title'].unique()
161
+ song_title = st.multiselect(
162
+ "Select Songs You Like",
163
+ options=user_songs,
164
+ default=user_songs[:1] if len(user_songs) > 0 else None
165
+ )
166
+ top_n = st.slider("Number of Recommendations", min_value=1, max_value=10, value=5)
167
+ get_recs = st.button("Get Recommendations! 🎶")
168
+
169
+ if get_recs:
170
+ st.header("🎵 Your Recommendations")
171
+ recommendations = hybrid_recommendv2(user_id, song_title, top_n)
172
+ if recommendations.empty:
173
+ st.error("No recommendations found. Try selecting different songs or users.")
174
+ else:
175
+ st.balloons()
176
+ for idx, row in recommendations.iterrows():
177
+ youtube_link = f"https://www.youtube.com/results?search_query={row['title']}+{row['artist_name']}"
178
+ st.markdown(f"""
179
+ <div class="recommendation-card">
180
+ <h3>{row['title']}</h3>
181
+ <p><strong>Artist:</strong> {row['artist_name']}</p>
182
+ <p><strong>Album:</strong> {row['release']}</p>
183
+ <a href="{youtube_link}" target="_blank" class="youtube-link">
184
+ Watch on YouTube
185
+ </div>
186
+ """, unsafe_allow_html=True)