bugs fixed
Browse files- .cache +1 -0
- __pycache__/spotify_music_recommender.cpython-310.pyc +0 -0
- app.py +18 -16
- cluster_labels.csv → data/cluster_labels.csv +0 -0
- pipeline.pkl → data/pipeline.pkl +0 -0
- spotify_music_recommender.py +59 -62
.cache
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"access_token": "BQCcgc_4zWsgVUt9b9_r6ka5YkOS0KReZE6pPHKS0ELyM5K3KOfyyUyED87zZORxZbpPAGRxFEpQ8e24eQo6Mleh2xP8tZzVKkqnCDszX68thYCwqbc", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1685645349}
|
__pycache__/spotify_music_recommender.cpython-310.pyc
ADDED
Binary file (7.81 kB). View file
|
|
app.py
CHANGED
@@ -3,6 +3,9 @@ from streamlit_option_menu import option_menu
|
|
3 |
import streamlit.components.v1 as components
|
4 |
import spotify_music_recommender as smr
|
5 |
|
|
|
|
|
|
|
6 |
|
7 |
def song_page(name, year):
|
8 |
song_uri = smr.find_song_uri(name, year)
|
@@ -15,7 +18,7 @@ def spr_sidebar():
|
|
15 |
menu = option_menu(
|
16 |
menu_title=None,
|
17 |
options=['Home', 'Results', 'About'],
|
18 |
-
icons=['house', 'book', 'info-square'
|
19 |
menu_icon='cast',
|
20 |
default_index=0,
|
21 |
orientation='horizontal'
|
@@ -36,13 +39,13 @@ def home_page():
|
|
36 |
st.title("Spotify Music Recommender")
|
37 |
|
38 |
# Song input section
|
39 |
-
#st.subheader("")
|
40 |
col1, col2 = st.columns(2)
|
41 |
song_input = col1.text_input("Enter a song:")
|
42 |
year_input = col2.text_input("Enter the year:")
|
43 |
|
44 |
# Button section
|
45 |
-
#st.subheader("")
|
46 |
col3, col4 = st.columns(2)
|
47 |
find_song_button = col3.button("Find Song")
|
48 |
find_random_song_button = col4.button("Random Song")
|
@@ -54,25 +57,27 @@ def home_page():
|
|
54 |
# Prediction button
|
55 |
predict_button = st.button("Start Prediction")
|
56 |
|
57 |
-
st.markdown("<br>", unsafe_allow_html=True)
|
58 |
-
|
59 |
-
# Url = st.text_input(label="Song Url",key='song_url',on_change=update_song_url)
|
60 |
if find_song_button:
|
61 |
song_page(song_input, year_input)
|
62 |
elif find_random_song_button:
|
63 |
find_random_song()
|
64 |
-
elif song_input == "" and year_input == "":
|
|
|
65 |
find_random_song()
|
66 |
|
67 |
if predict_button:
|
68 |
with st.spinner('Getting Recommendations...'):
|
69 |
try:
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
user_critic_text = critic_input
|
72 |
rec_splitted = smr.get_recommendation_array(
|
73 |
song_input, year_input, number_cols, user_critic_text)
|
74 |
res = smr.recommend_gpt(
|
75 |
-
rec_splitted, data, song_cluster_pipeline)
|
76 |
st.session_state.song_uris = smr.get_rec_song_uri(res)
|
77 |
st.write("You can access recommended song at result page")
|
78 |
except:
|
@@ -103,18 +108,15 @@ def find_random_song():
|
|
103 |
|
104 |
|
105 |
def result_page():
|
106 |
-
|
107 |
-
i = 0
|
108 |
for uri in st.session_state.song_uris:
|
109 |
uri = uri.split(":")[-1]
|
110 |
uri_link = "https://open.spotify.com/embed/track/" + \
|
111 |
uri + "?utm_source=generator&theme=0"
|
112 |
components.iframe(uri_link, height=80)
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
except:
|
117 |
-
st.subheader("Please enter song informations and review then click start prediction")
|
118 |
|
119 |
|
120 |
def examples_page():
|
|
|
3 |
import streamlit.components.v1 as components
|
4 |
import spotify_music_recommender as smr
|
5 |
|
6 |
+
if "song_init" not in st.session_state:
|
7 |
+
st.session_state.song_init = False
|
8 |
+
|
9 |
|
10 |
def song_page(name, year):
|
11 |
song_uri = smr.find_song_uri(name, year)
|
|
|
18 |
menu = option_menu(
|
19 |
menu_title=None,
|
20 |
options=['Home', 'Results', 'About'],
|
21 |
+
icons=['house', 'book', 'info-square'],
|
22 |
menu_icon='cast',
|
23 |
default_index=0,
|
24 |
orientation='horizontal'
|
|
|
39 |
st.title("Spotify Music Recommender")
|
40 |
|
41 |
# Song input section
|
42 |
+
# st.subheader("")
|
43 |
col1, col2 = st.columns(2)
|
44 |
song_input = col1.text_input("Enter a song:")
|
45 |
year_input = col2.text_input("Enter the year:")
|
46 |
|
47 |
# Button section
|
48 |
+
# st.subheader("")
|
49 |
col3, col4 = st.columns(2)
|
50 |
find_song_button = col3.button("Find Song")
|
51 |
find_random_song_button = col4.button("Random Song")
|
|
|
57 |
# Prediction button
|
58 |
predict_button = st.button("Start Prediction")
|
59 |
|
|
|
|
|
|
|
60 |
if find_song_button:
|
61 |
song_page(song_input, year_input)
|
62 |
elif find_random_song_button:
|
63 |
find_random_song()
|
64 |
+
elif song_input == "" and year_input == "" and not st.session_state.song_init:
|
65 |
+
st.session_state.song_init = True
|
66 |
find_random_song()
|
67 |
|
68 |
if predict_button:
|
69 |
with st.spinner('Getting Recommendations...'):
|
70 |
try:
|
71 |
+
data_path = "data/data.csv"
|
72 |
+
file_path = "data/pipeline.pkl"
|
73 |
+
cluster_labels = "data/cluster_labels.csv"
|
74 |
+
song_cluster_pipeline, data, number_cols = smr.get_model_values(
|
75 |
+
data_path, file_path, cluster_labels)
|
76 |
user_critic_text = critic_input
|
77 |
rec_splitted = smr.get_recommendation_array(
|
78 |
song_input, year_input, number_cols, user_critic_text)
|
79 |
res = smr.recommend_gpt(
|
80 |
+
rec_splitted, data, song_cluster_pipeline, 15)
|
81 |
st.session_state.song_uris = smr.get_rec_song_uri(res)
|
82 |
st.write("You can access recommended song at result page")
|
83 |
except:
|
|
|
108 |
|
109 |
|
110 |
def result_page():
|
111 |
+
if "song_uris" in st.session_state:
|
|
|
112 |
for uri in st.session_state.song_uris:
|
113 |
uri = uri.split(":")[-1]
|
114 |
uri_link = "https://open.spotify.com/embed/track/" + \
|
115 |
uri + "?utm_source=generator&theme=0"
|
116 |
components.iframe(uri_link, height=80)
|
117 |
+
else:
|
118 |
+
st.subheader(
|
119 |
+
"Please enter song informations and review then click start prediction")
|
|
|
|
|
120 |
|
121 |
|
122 |
def examples_page():
|
cluster_labels.csv → data/cluster_labels.csv
RENAMED
File without changes
|
pipeline.pkl → data/pipeline.pkl
RENAMED
File without changes
|
spotify_music_recommender.py
CHANGED
@@ -6,8 +6,8 @@
|
|
6 |
# In[22]:
|
7 |
|
8 |
|
9 |
-
#import os
|
10 |
-
#import difflib
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
13 |
import openai
|
@@ -28,26 +28,23 @@ from collections import defaultdict
|
|
28 |
import warnings
|
29 |
warnings.filterwarnings("ignore")
|
30 |
|
31 |
-
|
32 |
# In[23]:
|
33 |
|
34 |
|
35 |
def get_pipeline_data_number_cols():
|
36 |
data = pd.read_csv("data/data.csv")
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
|
41 |
-
('kmeans', KMeans(n_clusters=20,
|
42 |
verbose=False))
|
43 |
-
|
44 |
|
45 |
X = data.select_dtypes(np.number)
|
46 |
number_cols = list(X.columns)
|
47 |
song_cluster_pipeline.fit(X)
|
48 |
song_cluster_labels = song_cluster_pipeline.predict(X)
|
49 |
data['cluster_label'] = song_cluster_labels
|
50 |
-
|
51 |
return song_cluster_pipeline, data, number_cols
|
52 |
|
53 |
|
@@ -55,9 +52,10 @@ def get_pipeline_data_number_cols():
|
|
55 |
|
56 |
|
57 |
def find_song(name, year):
|
58 |
-
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
|
|
|
59 |
song_data = defaultdict()
|
60 |
-
results = sp.search(q=
|
61 |
if results['tracks']['items'] == []:
|
62 |
return None
|
63 |
|
@@ -76,38 +74,43 @@ def find_song(name, year):
|
|
76 |
|
77 |
return pd.DataFrame(song_data)
|
78 |
|
|
|
79 |
def find_song_uri(name, year):
|
80 |
# Create a Spotify client object.
|
81 |
-
client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
|
|
|
82 |
# Get the name of the song you want to get the ID for.
|
83 |
song_name = name
|
84 |
# Call the `search` method with the song name.
|
85 |
-
results = client.search(q=
|
86 |
# Get the first result.
|
87 |
track = results['tracks']['items'][0]
|
88 |
# The Spotify ID of the song will be in the `id` property.
|
89 |
song_id = track['uri']
|
90 |
return song_id
|
91 |
|
|
|
92 |
def format_song(song_data, number_cols):
|
93 |
list_song_data = song_data[number_cols].values.tolist()[0]
|
94 |
-
list_song_data = '[' + ', '.join([str(num)
|
|
|
95 |
return list_song_data
|
96 |
|
|
|
97 |
def get_response(text):
|
98 |
-
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
110 |
-
|
111 |
|
112 |
|
113 |
# In[25]:
|
@@ -127,20 +130,19 @@ def get_text(user_critic, list_song_data):
|
|
127 |
75% 0.747 1999 0.893 0.668 262400 0.703 0 0.102 8 0.261 -7.183 1 48 0.0756 135.537 \n \
|
128 |
max 1 2020 0.996 0.988 5403500 1 1 1 11 1 3.855 1 100 0.97 243.507"
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
init_last = "\n\n start with only typing random future_columns values in given range as a array"
|
133 |
-
#user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
|
134 |
user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] format"
|
135 |
user_last = "\n\n start with the adjust following future_columns based on user_critic. "
|
136 |
-
#example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
|
137 |
-
#feature_col_starter = "future_columns="
|
138 |
real_features = "future_columns=" + list_song_data
|
139 |
-
|
140 |
-
#init_input = init_text + init_last
|
141 |
-
#test_input = init_text + user_last + user_critic + example_features + user_critic_last
|
142 |
-
real_input = init_text + user_last +
|
143 |
-
|
|
|
144 |
return real_input
|
145 |
|
146 |
|
@@ -156,11 +158,10 @@ def format_gpt_output(rec_splitted):
|
|
156 |
# In[27]:
|
157 |
|
158 |
|
159 |
-
def recommend_gpt(
|
160 |
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
|
161 |
-
|
162 |
|
163 |
-
|
164 |
metadata_cols = ['name', 'year', 'artists']
|
165 |
song_center = np.array(song_list)
|
166 |
|
@@ -170,9 +171,9 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
|
|
170 |
|
171 |
distances = cdist(scaled_song_center, scaled_data, 'cosine')
|
172 |
index = list(np.argsort(distances)[:, :n_songs][0])
|
173 |
-
|
174 |
rec_songs = spotify_data.iloc[index]
|
175 |
-
#rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
|
176 |
return rec_songs[metadata_cols].to_dict(orient='records')
|
177 |
|
178 |
|
@@ -182,7 +183,7 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
|
|
182 |
def get_rec_song_uri(res):
|
183 |
song_spotipy_info = []
|
184 |
for song in res:
|
185 |
-
song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
|
186 |
return song_spotipy_info
|
187 |
|
188 |
|
@@ -206,39 +207,35 @@ def get_random_song():
|
|
206 |
sample = data.sample(n=1)
|
207 |
return sample.name, sample.year
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
|
|
213 |
# Load the pipeline from the pickle file
|
214 |
with open(file_path, 'rb') as file:
|
215 |
loaded_pipeline = pickle.load(file)
|
216 |
data = pd.read_csv(data_path)
|
217 |
labels = pd.read_csv(cluster_path)
|
218 |
data["cluster_label"] = labels["cluster_label"]
|
219 |
-
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
|
|
|
220 |
return loaded_pipeline, data, number_cols
|
221 |
|
|
|
222 |
def control():
|
223 |
-
#song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
|
|
|
|
|
|
|
|
|
224 |
|
225 |
-
song_cluster_pipeline, data, number_cols = get_model_values()
|
226 |
-
|
227 |
user_critic_text = "it was dull and very loud"
|
228 |
song_name = "Poem of a Killer"
|
229 |
song_year = 2022
|
230 |
-
rec_splitted = get_recommendation_array(
|
|
|
231 |
|
232 |
res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
|
233 |
print(res)
|
234 |
print(get_rec_song_uri(res))
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
# In[35]:
|
239 |
-
|
240 |
-
# In[ ]:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
6 |
# In[22]:
|
7 |
|
8 |
|
9 |
+
# import os
|
10 |
+
# import difflib
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
13 |
import openai
|
|
|
28 |
import warnings
|
29 |
warnings.filterwarnings("ignore")
|
30 |
|
|
|
31 |
# In[23]:
|
32 |
|
33 |
|
34 |
def get_pipeline_data_number_cols():
|
35 |
data = pd.read_csv("data/data.csv")
|
36 |
+
|
37 |
+
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
|
38 |
+
('kmeans', KMeans(n_clusters=20,
|
|
|
|
|
39 |
verbose=False))
|
40 |
+
], verbose=False)
|
41 |
|
42 |
X = data.select_dtypes(np.number)
|
43 |
number_cols = list(X.columns)
|
44 |
song_cluster_pipeline.fit(X)
|
45 |
song_cluster_labels = song_cluster_pipeline.predict(X)
|
46 |
data['cluster_label'] = song_cluster_labels
|
47 |
+
|
48 |
return song_cluster_pipeline, data, number_cols
|
49 |
|
50 |
|
|
|
52 |
|
53 |
|
54 |
def find_song(name, year):
|
55 |
+
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
|
56 |
+
client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
|
57 |
song_data = defaultdict()
|
58 |
+
results = sp.search(q='track: {} year: {}'.format(name, year), limit=1)
|
59 |
if results['tracks']['items'] == []:
|
60 |
return None
|
61 |
|
|
|
74 |
|
75 |
return pd.DataFrame(song_data)
|
76 |
|
77 |
+
|
78 |
def find_song_uri(name, year):
|
79 |
# Create a Spotify client object.
|
80 |
+
client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
|
81 |
+
client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
|
82 |
# Get the name of the song you want to get the ID for.
|
83 |
song_name = name
|
84 |
# Call the `search` method with the song name.
|
85 |
+
results = client.search(q='track: {} year: {}'.format(name, year), limit=1)
|
86 |
# Get the first result.
|
87 |
track = results['tracks']['items'][0]
|
88 |
# The Spotify ID of the song will be in the `id` property.
|
89 |
song_id = track['uri']
|
90 |
return song_id
|
91 |
|
92 |
+
|
93 |
def format_song(song_data, number_cols):
|
94 |
list_song_data = song_data[number_cols].values.tolist()[0]
|
95 |
+
list_song_data = '[' + ', '.join([str(num)
|
96 |
+
for num in list_song_data]) + ']'
|
97 |
return list_song_data
|
98 |
|
99 |
+
|
100 |
def get_response(text):
|
101 |
+
openai.api_key = "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"
|
102 |
|
103 |
+
response = openai.Completion.create(
|
104 |
+
model="text-davinci-003",
|
105 |
+
prompt=text,
|
106 |
+
temperature=0.7,
|
107 |
+
max_tokens=128,
|
108 |
+
top_p=1,
|
109 |
+
frequency_penalty=0,
|
110 |
+
presence_penalty=0
|
111 |
+
)
|
112 |
|
113 |
+
return response.choices[0].get("text")
|
114 |
|
115 |
|
116 |
# In[25]:
|
|
|
130 |
75% 0.747 1999 0.893 0.668 262400 0.703 0 0.102 8 0.261 -7.183 1 48 0.0756 135.537 \n \
|
131 |
max 1 2020 0.996 0.988 5403500 1 1 1 11 1 3.855 1 100 0.97 243.507"
|
132 |
|
133 |
+
# init_last = "\n\n start with only typing random future_columns values in given range as a array"
|
134 |
+
# user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
|
|
|
|
|
135 |
user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] format"
|
136 |
user_last = "\n\n start with the adjust following future_columns based on user_critic. "
|
137 |
+
# example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
|
138 |
+
# feature_col_starter = "future_columns="
|
139 |
real_features = "future_columns=" + list_song_data
|
140 |
+
|
141 |
+
# init_input = init_text + init_last
|
142 |
+
# test_input = init_text + user_last + user_critic + example_features + user_critic_last
|
143 |
+
real_input = init_text + user_last + \
|
144 |
+
user_critic + real_features + user_critic_last
|
145 |
+
|
146 |
return real_input
|
147 |
|
148 |
|
|
|
158 |
# In[27]:
|
159 |
|
160 |
|
161 |
+
def recommend_gpt(song_list, spotify_data, song_cluster_pipeline, n_songs=15):
|
162 |
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
|
163 |
+
'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
|
164 |
|
|
|
165 |
metadata_cols = ['name', 'year', 'artists']
|
166 |
song_center = np.array(song_list)
|
167 |
|
|
|
171 |
|
172 |
distances = cdist(scaled_song_center, scaled_data, 'cosine')
|
173 |
index = list(np.argsort(distances)[:, :n_songs][0])
|
174 |
+
|
175 |
rec_songs = spotify_data.iloc[index]
|
176 |
+
# rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
|
177 |
return rec_songs[metadata_cols].to_dict(orient='records')
|
178 |
|
179 |
|
|
|
183 |
def get_rec_song_uri(res):
|
184 |
song_spotipy_info = []
|
185 |
for song in res:
|
186 |
+
song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
|
187 |
return song_spotipy_info
|
188 |
|
189 |
|
|
|
207 |
sample = data.sample(n=1)
|
208 |
return sample.name, sample.year
|
209 |
|
210 |
+
|
211 |
+
def get_model_values(data_path, file_path, cluster_path):
|
212 |
+
data_path = data_path
|
213 |
+
file_path = file_path
|
214 |
+
cluster_path = cluster_path
|
215 |
# Load the pipeline from the pickle file
|
216 |
with open(file_path, 'rb') as file:
|
217 |
loaded_pipeline = pickle.load(file)
|
218 |
data = pd.read_csv(data_path)
|
219 |
labels = pd.read_csv(cluster_path)
|
220 |
data["cluster_label"] = labels["cluster_label"]
|
221 |
+
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
|
222 |
+
'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
|
223 |
return loaded_pipeline, data, number_cols
|
224 |
|
225 |
+
|
226 |
def control():
|
227 |
+
# song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
|
228 |
+
data_path = "data/data.csv"
|
229 |
+
file_path = "data/pipeline.pkl"
|
230 |
+
cluster_labels = "data/cluster_labels.csv"
|
231 |
+
song_cluster_pipeline, data, number_cols = get_model_values(data_path, file_path, cluster_labels)
|
232 |
|
|
|
|
|
233 |
user_critic_text = "it was dull and very loud"
|
234 |
song_name = "Poem of a Killer"
|
235 |
song_year = 2022
|
236 |
+
rec_splitted = get_recommendation_array(
|
237 |
+
song_name, song_year, number_cols, user_critic_text)
|
238 |
|
239 |
res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
|
240 |
print(res)
|
241 |
print(get_rec_song_uri(res))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|