alanahmet commited on
Commit
1454959
·
1 Parent(s): a6458f8
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+ import streamlit.components.v1 as components
4
+ import spotify_music_recommender as smr
5
+
6
+
7
+ def song_page(name, year):
8
+ song_uri = smr.find_song_uri(name, year)
9
+ formatted_song_uri = song_uri.split(':')[-1]
10
+ uri_link = f'https://open.spotify.com/embed/track/{formatted_song_uri}?utm_source=generator'
11
+ components.iframe(uri_link, height=100)
12
+
13
+
14
+ def spr_sidebar():
15
+ menu = option_menu(
16
+ menu_title=None,
17
+ options=['Home', 'Results', 'About'],
18
+ icons=['house', 'book', 'info-square', 'gear'], # terminal
19
+ menu_icon='cast',
20
+ default_index=0,
21
+ orientation='horizontal'
22
+ )
23
+ if menu == 'Home':
24
+ st.session_state.app_mode = 'Home'
25
+ elif menu == 'Results':
26
+ st.session_state.app_mode = 'Results'
27
+ elif menu == 'About':
28
+ st.session_state.app_mode = 'About'
29
+ # elif menu == 'How It Works':
30
+ # st.session_state.app_mode = 'How It Works'
31
+
32
+
33
+ def home_page():
34
+
35
+ # App layout
36
+ st.title("Spotify Music Recommender")
37
+
38
+ # Song input section
39
+ #st.subheader("")
40
+ col1, col2 = st.columns(2)
41
+ song_input = col1.text_input("Enter a song:")
42
+ year_input = col2.text_input("Enter the year:")
43
+
44
+ # Button section
45
+ #st.subheader("")
46
+ col3, col4 = st.columns(2)
47
+ find_song_button = col3.button("Find Song")
48
+ find_random_song_button = col4.button("Random Song")
49
+
50
+ # Critic input section
51
+ st.subheader("Song Review")
52
+ critic_input = st.text_input("")
53
+
54
+ # Prediction button
55
+ predict_button = st.button("Start Prediction")
56
+
57
+ st.markdown("<br>", unsafe_allow_html=True)
58
+
59
+ # Url = st.text_input(label="Song Url",key='song_url',on_change=update_song_url)
60
+ if find_song_button:
61
+ song_page(song_input, year_input)
62
+ elif find_random_song_button:
63
+ find_random_song()
64
+ elif song_input == "" and year_input == "":
65
+ find_random_song()
66
+
67
+ if predict_button:
68
+ with st.spinner('Getting Recommendations...'):
69
+ try:
70
+ song_cluster_pipeline, data, number_cols = smr.get_model_values()
71
+ user_critic_text = critic_input
72
+ rec_splitted = smr.get_recommendation_array(
73
+ song_input, year_input, number_cols, user_critic_text)
74
+ res = smr.recommend_gpt(
75
+ rec_splitted, data, song_cluster_pipeline)
76
+ st.session_state.song_uris = smr.get_rec_song_uri(res)
77
+ st.write("You can access recommended song at result page")
78
+ except:
79
+ st.write("An error occured please try again")
80
+
81
+
82
+ # def text_field(label, columns=None, **input_params):
83
+ # c1, c2 = st.columns(columns or [1, 4])
84
+
85
+ # # Display field name with some alignment
86
+ # c1.markdown("##")
87
+ # c1.markdown(label)
88
+
89
+ # # Sets a default key parameter to avoid duplicate key errors
90
+ # input_params.setdefault("key", label)
91
+
92
+ # # Forward text input parameters
93
+ # return c2.text_input("", **input_params)
94
+
95
+
96
+ def find_random_song():
97
+ try:
98
+ song_input, year_input = smr.get_random_song()
99
+ song_page(song_input, year_input)
100
+ except:
101
+ song_input, year_input = "Heat Waves", "2020"
102
+ song_page(song_input, year_input)
103
+
104
+
105
+ def result_page():
106
+ try:
107
+ i = 0
108
+ for uri in st.session_state.song_uris:
109
+ uri = uri.split(":")[-1]
110
+ uri_link = "https://open.spotify.com/embed/track/" + \
111
+ uri + "?utm_source=generator&theme=0"
112
+ components.iframe(uri_link, height=80)
113
+ i += 1
114
+ if i % 5 == 0:
115
+ time.sleep(1)
116
+ except:
117
+ st.subheader("Please enter song informations and review then click start prediction")
118
+
119
+
120
+ def examples_page():
121
+ st.subheader("Will added")
122
+
123
+
124
+ def About_page():
125
+ st.header('Development')
126
+ """
127
+ Have you ever listened to a song and liked it overall, but felt that certain features could be improved? Maybe the chorus was too loud, the energy level wasn't quite right, or there were either too many or too few words. I've had those experiences too, and that's what inspired me to create a song recommender based on user reviews.
128
+
129
+ The process is straightforward: simply type in the name of a song or choose a random one, and then enter your review. The recommender will analyze your review using ChatGPT and utilize the Spotify API to generate personalized song recommendations. It's an exciting way to enhance your music discovery and tailor the recommendations to your specific preferences.
130
+
131
+ Although it's important to note that the dataset used for training the model was relatively small (170k), the recommender still aims to provide valuable suggestions. While it may not reach its full potential due to the limited data, it serves as a starting point for exploring new songs that align with your individual tastes.
132
+
133
+ So, if you're looking to fine-tune your music experience and discover songs that better match your preferences, give this song recommender a try. Enter your review, and let the algorithm work its magic to recommend songs that you're more likely to enjoy.\n
134
+ Github : [alanahmet](https://github.com/alanahmet) \n
135
136
+ """
137
+ st.subheader('Audio Features Explanation')
138
+ """
139
+ | Variable | Description |
140
+ | :----: | :---: |
141
+ | Acousticness | A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic. |
142
+ | Danceability | Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable. |
143
+ | Energy | Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy. |
144
+ | Instrumentalness | Predicts whether a track contains no vocals. "Ooh" and "aah" sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly "vocal". The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0. |
145
+ | Key | The key the track is in. Integers map to pitches using standard Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key was detected, the value is -1. |
146
+ | Liveness | Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides strong likelihood that the track is live. |
147
+ | Loudness | The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track and are useful for comparing relative loudness of tracks. Loudness is the quality of a sound that is the primary psychological correlate of physical strength (amplitude). Values typically range between -60 and 0 db. |
148
+ | Mode | Mode indicates the modality (major or minor) of a track, the type of scale from which its melodic content is derived. Major is represented by 1 and minor is 0. |
149
+ | Speechiness | Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks. |
150
+ | Tempo | The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece and derives directly from the average beat duration. |
151
+ | Time Signature | An estimated time signature. The time signature (meter) is a notational convention to specify how many beats are in each bar (or measure). The time signature ranges from 3 to 7 indicating time signatures of "3/4", to "7/4". |
152
+ | Valence | A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry). |
153
+
154
+ Information about features: [here](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features)
155
+ """
156
+
157
+ st.subheader('Credit')
158
+ """
159
+ Thanks for base of streamlit application to [abdelrhmanelruby](https://github.com/abdelrhmanelruby/Spotify-Recommendation-System) and dataset can be found [here](https://www.kaggle.com/datasets/vatsalmavani/spotify-dataset)
160
+ """
161
+
162
+
163
+ def main():
164
+ spr_sidebar()
165
+ if st.session_state.app_mode == 'Home':
166
+ home_page()
167
+ if st.session_state.app_mode == 'Results':
168
+ result_page()
169
+ if st.session_state.app_mode == 'About':
170
+ About_page()
171
+ # if st.session_state.app_mode == 'How It Works':
172
+ # examples_page()
173
+
174
+
175
+ # Run main()
176
+ if __name__ == '__main__':
177
+ main()
cluster_labels.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data.csv ADDED
The diff for this file is too large to render. See raw diff
 
pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e51de0983dff547c840fdf71c2483d8f4c66dbe2c7aa6c7c384712c6f9f2e5
3
+ size 686621
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit-option-menu
2
+ streamlit
3
+ scikit-learn
4
+ openai
5
+ numpy
6
+ pandas
7
+ spotipy
8
+ scipy
spotify_music_recommender.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # # **Import Libraries**
5
+
6
+ # In[22]:
7
+
8
+
9
+ #import os
10
+ #import difflib
11
+ import numpy as np
12
+ import pandas as pd
13
+ import openai
14
+ import spotipy
15
+ import pickle
16
+
17
+ from sklearn.cluster import KMeans
18
+ from sklearn.preprocessing import StandardScaler
19
+ from sklearn.pipeline import Pipeline
20
+ from sklearn.manifold import TSNE
21
+ from sklearn.decomposition import PCA
22
+ from sklearn.metrics import euclidean_distances
23
+ from scipy.spatial.distance import cdist
24
+ from spotipy.oauth2 import SpotifyClientCredentials
25
+ from collections import defaultdict
26
+
27
+
28
+ import warnings
29
+ warnings.filterwarnings("ignore")
30
+
31
+
32
+ # In[23]:
33
+
34
+
35
+ def get_pipeline_data_number_cols():
36
+ data = pd.read_csv("data/data.csv")
37
+ # genre_data = pd.read_csv('data/data_by_genres.csv')
38
+ # year_data = pd.read_csv('data/data_by_year.csv')
39
+
40
+ song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
41
+ ('kmeans', KMeans(n_clusters=20,
42
+ verbose=False))
43
+ ], verbose=False)
44
+
45
+ X = data.select_dtypes(np.number)
46
+ number_cols = list(X.columns)
47
+ song_cluster_pipeline.fit(X)
48
+ song_cluster_labels = song_cluster_pipeline.predict(X)
49
+ data['cluster_label'] = song_cluster_labels
50
+
51
+ return song_cluster_pipeline, data, number_cols
52
+
53
+
54
+ # In[24]:
55
+
56
+
57
+ def find_song(name, year):
58
+ sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
59
+ song_data = defaultdict()
60
+ results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
61
+ if results['tracks']['items'] == []:
62
+ return None
63
+
64
+ results = results['tracks']['items'][0]
65
+ track_id = results['id']
66
+ audio_features = sp.audio_features(track_id)[0]
67
+
68
+ song_data['name'] = [name]
69
+ song_data['year'] = [year]
70
+ song_data['explicit'] = [int(results['explicit'])]
71
+ song_data['duration_ms'] = [results['duration_ms']]
72
+ song_data['popularity'] = [results['popularity']]
73
+
74
+ for key, value in audio_features.items():
75
+ song_data[key] = value
76
+
77
+ return pd.DataFrame(song_data)
78
+
79
+ def find_song_uri(name, year):
80
+ # Create a Spotify client object.
81
+ client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
82
+ # Get the name of the song you want to get the ID for.
83
+ song_name = name
84
+ # Call the `search` method with the song name.
85
+ results = client.search(q= 'track: {} year: {}'.format(name,year), limit=1)
86
+ # Get the first result.
87
+ track = results['tracks']['items'][0]
88
+ # The Spotify ID of the song will be in the `id` property.
89
+ song_id = track['uri']
90
+ return song_id
91
+
92
+ def format_song(song_data, number_cols):
93
+ list_song_data = song_data[number_cols].values.tolist()[0]
94
+ list_song_data = '[' + ', '.join([str(num) for num in list_song_data]) + ']'
95
+ return list_song_data
96
+
97
+ def get_response(text):
98
+ openai.api_key = "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"
99
+
100
+ response = openai.Completion.create(
101
+ model="text-davinci-003",
102
+ prompt=text,
103
+ temperature=0.7,
104
+ max_tokens=128,
105
+ top_p=1,
106
+ frequency_penalty=0,
107
+ presence_penalty=0
108
+ )
109
+
110
+ return response.choices[0].get("text")
111
+
112
+
113
+ # In[25]:
114
+
115
+
116
+ def get_text(user_critic, list_song_data):
117
+ init_text = "I want you to act as a song recommender. I will provide you songs data with following format f future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] \
118
+ values and user critic about the given song. And you will provide an array based on user critic.You must change at least 3 features. Do not write any explanations or other words, just return an array that include changes in future_columns\
119
+ and here is the describe values of future_columns \n\
120
+ valence year acousticness danceability duration_ms energy explicit instrumentalness key liveness loudness mode popularity speechiness tempo \n \
121
+ count 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 170653 \n \
122
+ mean 0.528587211 1976.787241 0.502114764 0.537395535 230948.3107 0.482388835 0.084575132 0.167009581 5.199844128 0.205838655 -11.46799004 0.706902311 31.43179434 0.098393262 116.8615896 \n \
123
+ std 0.263171464 25.91785256 0.376031725 0.176137736 126118.4147 0.267645705 0.278249228 0.313474674 3.515093906 0.174804661 5.697942912 0.455184191 21.82661514 0.162740072 30.70853304 \n \
124
+ min 0 1921 0 0 5108 0 0 0 0 0 -60 0 0 0 0 \n \
125
+ 25% 0.317 1956 0.102 0.415 169827 0.255 0 0 2 0.0988 -14.615 0 11 0.0349 93.421 \n \
126
+ 50% 0.54 1977 0.516 0.548 207467 0.471 0 0.000216 5 0.136 -10.58 1 33 0.045 114.729 \n \
127
+ 75% 0.747 1999 0.893 0.668 262400 0.703 0 0.102 8 0.261 -7.183 1 48 0.0756 135.537 \n \
128
+ max 1 2020 0.996 0.988 5403500 1 1 1 11 1 3.855 1 100 0.97 243.507"
129
+
130
+
131
+
132
+ init_last = "\n\n start with only typing random future_columns values in given range as a array"
133
+ #user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
134
+ user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] format"
135
+ user_last = "\n\n start with the adjust following future_columns based on user_critic. "
136
+ #example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
137
+ #feature_col_starter = "future_columns="
138
+ real_features = "future_columns=" + list_song_data
139
+
140
+ #init_input = init_text + init_last
141
+ #test_input = init_text + user_last + user_critic + example_features + user_critic_last
142
+ real_input = init_text + user_last + user_critic + real_features + user_critic_last
143
+
144
+ return real_input
145
+
146
+
147
+ # In[26]:
148
+
149
+
150
+ def format_gpt_output(rec_splitted):
151
+ formatted = rec_splitted[3:-1].split(",")
152
+ list_song_data = [float(i) for i in formatted]
153
+ return list_song_data
154
+
155
+
156
+ # In[27]:
157
+
158
+
159
+ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
160
+ number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
161
+ 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
162
+
163
+
164
+ metadata_cols = ['name', 'year', 'artists']
165
+ song_center = np.array(song_list)
166
+
167
+ scaler = song_cluster_pipeline.steps[0][1]
168
+ scaled_data = scaler.transform(spotify_data[number_cols])
169
+ scaled_song_center = scaler.transform(song_center.reshape(1, -1))
170
+
171
+ distances = cdist(scaled_song_center, scaled_data, 'cosine')
172
+ index = list(np.argsort(distances)[:, :n_songs][0])
173
+
174
+ rec_songs = spotify_data.iloc[index]
175
+ #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
176
+ return rec_songs[metadata_cols].to_dict(orient='records')
177
+
178
+
179
+ # In[28]:
180
+
181
+
182
+ def get_rec_song_uri(res):
183
+ song_spotipy_info = []
184
+ for song in res:
185
+ song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
186
+ return song_spotipy_info
187
+
188
+
189
+ # In[30]:
190
+
191
+
192
+ def get_recommendation_array(song_name, song_year, number_cols, user_critic_text):
193
+ song_data = find_song(song_name, song_year)
194
+ list_song_data = format_song(song_data, number_cols)
195
+ user_critic = "\n \"user_critic=" + user_critic_text
196
+
197
+ recommendation = get_response(get_text(user_critic, list_song_data))
198
+ rec_splitted = format_gpt_output(recommendation)
199
+ return rec_splitted
200
+
201
+
202
+ # In[34]:
203
+
204
+ def get_random_song():
205
+ data = pd.read_csv("data/data.csv")
206
+ sample = data.sample(n=1)
207
+ return sample.name, sample.year
208
+
209
+ def get_model_values():
210
+ data_path = "data/data.csv"
211
+ file_path = "pipeline.pkl"
212
+ cluster_path = "cluster_labels.csv"
213
+ # Load the pipeline from the pickle file
214
+ with open(file_path, 'rb') as file:
215
+ loaded_pipeline = pickle.load(file)
216
+ data = pd.read_csv(data_path)
217
+ labels = pd.read_csv(cluster_path)
218
+ data["cluster_label"] = labels["cluster_label"]
219
+ number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
220
+ return loaded_pipeline, data, number_cols
221
+
222
+ def control():
223
+ #song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
224
+
225
+ song_cluster_pipeline, data, number_cols = get_model_values()
226
+
227
+ user_critic_text = "it was dull and very loud"
228
+ song_name = "Poem of a Killer"
229
+ song_year = 2022
230
+ rec_splitted = get_recommendation_array(song_name, song_year, number_cols, user_critic_text)
231
+
232
+ res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
233
+ print(res)
234
+ print(get_rec_song_uri(res))
235
+
236
+
237
+
238
+ # In[35]:
239
+
240
+ # In[ ]:
241
+
242
+
243
+
244
+