alanahmet commited on
Commit
2167e1f
·
1 Parent(s): 1454959

bugs fixed

Browse files
.cache ADDED
@@ -0,0 +1 @@
 
 
1
+ {"access_token": "BQCcgc_4zWsgVUt9b9_r6ka5YkOS0KReZE6pPHKS0ELyM5K3KOfyyUyED87zZORxZbpPAGRxFEpQ8e24eQo6Mleh2xP8tZzVKkqnCDszX68thYCwqbc", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1685645349}
__pycache__/spotify_music_recommender.cpython-310.pyc ADDED
Binary file (7.81 kB). View file
 
app.py CHANGED
@@ -3,6 +3,9 @@ from streamlit_option_menu import option_menu
3
  import streamlit.components.v1 as components
4
  import spotify_music_recommender as smr
5
 
 
 
 
6
 
7
  def song_page(name, year):
8
  song_uri = smr.find_song_uri(name, year)
@@ -15,7 +18,7 @@ def spr_sidebar():
15
  menu = option_menu(
16
  menu_title=None,
17
  options=['Home', 'Results', 'About'],
18
- icons=['house', 'book', 'info-square', 'gear'], # terminal
19
  menu_icon='cast',
20
  default_index=0,
21
  orientation='horizontal'
@@ -36,13 +39,13 @@ def home_page():
36
  st.title("Spotify Music Recommender")
37
 
38
  # Song input section
39
- #st.subheader("")
40
  col1, col2 = st.columns(2)
41
  song_input = col1.text_input("Enter a song:")
42
  year_input = col2.text_input("Enter the year:")
43
 
44
  # Button section
45
- #st.subheader("")
46
  col3, col4 = st.columns(2)
47
  find_song_button = col3.button("Find Song")
48
  find_random_song_button = col4.button("Random Song")
@@ -54,25 +57,27 @@ def home_page():
54
  # Prediction button
55
  predict_button = st.button("Start Prediction")
56
 
57
- st.markdown("<br>", unsafe_allow_html=True)
58
-
59
- # Url = st.text_input(label="Song Url",key='song_url',on_change=update_song_url)
60
  if find_song_button:
61
  song_page(song_input, year_input)
62
  elif find_random_song_button:
63
  find_random_song()
64
- elif song_input == "" and year_input == "":
 
65
  find_random_song()
66
 
67
  if predict_button:
68
  with st.spinner('Getting Recommendations...'):
69
  try:
70
- song_cluster_pipeline, data, number_cols = smr.get_model_values()
 
 
 
 
71
  user_critic_text = critic_input
72
  rec_splitted = smr.get_recommendation_array(
73
  song_input, year_input, number_cols, user_critic_text)
74
  res = smr.recommend_gpt(
75
- rec_splitted, data, song_cluster_pipeline)
76
  st.session_state.song_uris = smr.get_rec_song_uri(res)
77
  st.write("You can access recommended song at result page")
78
  except:
@@ -103,18 +108,15 @@ def find_random_song():
103
 
104
 
105
  def result_page():
106
- try:
107
- i = 0
108
  for uri in st.session_state.song_uris:
109
  uri = uri.split(":")[-1]
110
  uri_link = "https://open.spotify.com/embed/track/" + \
111
  uri + "?utm_source=generator&theme=0"
112
  components.iframe(uri_link, height=80)
113
- i += 1
114
- if i % 5 == 0:
115
- time.sleep(1)
116
- except:
117
- st.subheader("Please enter song informations and review then click start prediction")
118
 
119
 
120
  def examples_page():
 
3
  import streamlit.components.v1 as components
4
  import spotify_music_recommender as smr
5
 
6
+ if "song_init" not in st.session_state:
7
+ st.session_state.song_init = False
8
+
9
 
10
  def song_page(name, year):
11
  song_uri = smr.find_song_uri(name, year)
 
18
  menu = option_menu(
19
  menu_title=None,
20
  options=['Home', 'Results', 'About'],
21
+ icons=['house', 'book', 'info-square'],
22
  menu_icon='cast',
23
  default_index=0,
24
  orientation='horizontal'
 
39
  st.title("Spotify Music Recommender")
40
 
41
  # Song input section
42
+ # st.subheader("")
43
  col1, col2 = st.columns(2)
44
  song_input = col1.text_input("Enter a song:")
45
  year_input = col2.text_input("Enter the year:")
46
 
47
  # Button section
48
+ # st.subheader("")
49
  col3, col4 = st.columns(2)
50
  find_song_button = col3.button("Find Song")
51
  find_random_song_button = col4.button("Random Song")
 
57
  # Prediction button
58
  predict_button = st.button("Start Prediction")
59
 
 
 
 
60
  if find_song_button:
61
  song_page(song_input, year_input)
62
  elif find_random_song_button:
63
  find_random_song()
64
+ elif song_input == "" and year_input == "" and not st.session_state.song_init:
65
+ st.session_state.song_init = True
66
  find_random_song()
67
 
68
  if predict_button:
69
  with st.spinner('Getting Recommendations...'):
70
  try:
71
+ data_path = "data/data.csv"
72
+ file_path = "data/pipeline.pkl"
73
+ cluster_labels = "data/cluster_labels.csv"
74
+ song_cluster_pipeline, data, number_cols = smr.get_model_values(
75
+ data_path, file_path, cluster_labels)
76
  user_critic_text = critic_input
77
  rec_splitted = smr.get_recommendation_array(
78
  song_input, year_input, number_cols, user_critic_text)
79
  res = smr.recommend_gpt(
80
+ rec_splitted, data, song_cluster_pipeline, 15)
81
  st.session_state.song_uris = smr.get_rec_song_uri(res)
82
  st.write("You can access recommended song at result page")
83
  except:
 
108
 
109
 
110
  def result_page():
111
+ if "song_uris" in st.session_state:
 
112
  for uri in st.session_state.song_uris:
113
  uri = uri.split(":")[-1]
114
  uri_link = "https://open.spotify.com/embed/track/" + \
115
  uri + "?utm_source=generator&theme=0"
116
  components.iframe(uri_link, height=80)
117
+ else:
118
+ st.subheader(
119
+ "Please enter song informations and review then click start prediction")
 
 
120
 
121
 
122
  def examples_page():
cluster_labels.csv → data/cluster_labels.csv RENAMED
File without changes
pipeline.pkl → data/pipeline.pkl RENAMED
File without changes
spotify_music_recommender.py CHANGED
@@ -6,8 +6,8 @@
6
  # In[22]:
7
 
8
 
9
- #import os
10
- #import difflib
11
  import numpy as np
12
  import pandas as pd
13
  import openai
@@ -28,26 +28,23 @@ from collections import defaultdict
28
  import warnings
29
  warnings.filterwarnings("ignore")
30
 
31
-
32
  # In[23]:
33
 
34
 
35
  def get_pipeline_data_number_cols():
36
  data = pd.read_csv("data/data.csv")
37
- # genre_data = pd.read_csv('data/data_by_genres.csv')
38
- # year_data = pd.read_csv('data/data_by_year.csv')
39
-
40
- song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
41
- ('kmeans', KMeans(n_clusters=20,
42
  verbose=False))
43
- ], verbose=False)
44
 
45
  X = data.select_dtypes(np.number)
46
  number_cols = list(X.columns)
47
  song_cluster_pipeline.fit(X)
48
  song_cluster_labels = song_cluster_pipeline.predict(X)
49
  data['cluster_label'] = song_cluster_labels
50
-
51
  return song_cluster_pipeline, data, number_cols
52
 
53
 
@@ -55,9 +52,10 @@ def get_pipeline_data_number_cols():
55
 
56
 
57
  def find_song(name, year):
58
- sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
 
59
  song_data = defaultdict()
60
- results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
61
  if results['tracks']['items'] == []:
62
  return None
63
 
@@ -76,38 +74,43 @@ def find_song(name, year):
76
 
77
  return pd.DataFrame(song_data)
78
 
 
79
  def find_song_uri(name, year):
80
  # Create a Spotify client object.
81
- client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
 
82
  # Get the name of the song you want to get the ID for.
83
  song_name = name
84
  # Call the `search` method with the song name.
85
- results = client.search(q= 'track: {} year: {}'.format(name,year), limit=1)
86
  # Get the first result.
87
  track = results['tracks']['items'][0]
88
  # The Spotify ID of the song will be in the `id` property.
89
  song_id = track['uri']
90
  return song_id
91
 
 
92
  def format_song(song_data, number_cols):
93
  list_song_data = song_data[number_cols].values.tolist()[0]
94
- list_song_data = '[' + ', '.join([str(num) for num in list_song_data]) + ']'
 
95
  return list_song_data
96
 
 
97
  def get_response(text):
98
- openai.api_key = "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"
99
 
100
- response = openai.Completion.create(
101
- model="text-davinci-003",
102
- prompt=text,
103
- temperature=0.7,
104
- max_tokens=128,
105
- top_p=1,
106
- frequency_penalty=0,
107
- presence_penalty=0
108
- )
109
 
110
- return response.choices[0].get("text")
111
 
112
 
113
  # In[25]:
@@ -127,20 +130,19 @@ def get_text(user_critic, list_song_data):
127
  75% 0.747 1999 0.893 0.668 262400 0.703 0 0.102 8 0.261 -7.183 1 48 0.0756 135.537 \n \
128
  max 1 2020 0.996 0.988 5403500 1 1 1 11 1 3.855 1 100 0.97 243.507"
129
 
130
-
131
-
132
- init_last = "\n\n start with only typing random future_columns values in given range as a array"
133
- #user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
134
  user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] format"
135
  user_last = "\n\n start with the adjust following future_columns based on user_critic. "
136
- #example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
137
- #feature_col_starter = "future_columns="
138
  real_features = "future_columns=" + list_song_data
139
-
140
- #init_input = init_text + init_last
141
- #test_input = init_text + user_last + user_critic + example_features + user_critic_last
142
- real_input = init_text + user_last + user_critic + real_features + user_critic_last
143
-
 
144
  return real_input
145
 
146
 
@@ -156,11 +158,10 @@ def format_gpt_output(rec_splitted):
156
  # In[27]:
157
 
158
 
159
- def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
160
  number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
161
- 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
162
 
163
-
164
  metadata_cols = ['name', 'year', 'artists']
165
  song_center = np.array(song_list)
166
 
@@ -170,9 +171,9 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
170
 
171
  distances = cdist(scaled_song_center, scaled_data, 'cosine')
172
  index = list(np.argsort(distances)[:, :n_songs][0])
173
-
174
  rec_songs = spotify_data.iloc[index]
175
- #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
176
  return rec_songs[metadata_cols].to_dict(orient='records')
177
 
178
 
@@ -182,7 +183,7 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
182
  def get_rec_song_uri(res):
183
  song_spotipy_info = []
184
  for song in res:
185
- song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
186
  return song_spotipy_info
187
 
188
 
@@ -206,39 +207,35 @@ def get_random_song():
206
  sample = data.sample(n=1)
207
  return sample.name, sample.year
208
 
209
- def get_model_values():
210
- data_path = "data/data.csv"
211
- file_path = "pipeline.pkl"
212
- cluster_path = "cluster_labels.csv"
 
213
  # Load the pipeline from the pickle file
214
  with open(file_path, 'rb') as file:
215
  loaded_pipeline = pickle.load(file)
216
  data = pd.read_csv(data_path)
217
  labels = pd.read_csv(cluster_path)
218
  data["cluster_label"] = labels["cluster_label"]
219
- number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
 
220
  return loaded_pipeline, data, number_cols
221
 
 
222
  def control():
223
- #song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
 
 
 
 
224
 
225
- song_cluster_pipeline, data, number_cols = get_model_values()
226
-
227
  user_critic_text = "it was dull and very loud"
228
  song_name = "Poem of a Killer"
229
  song_year = 2022
230
- rec_splitted = get_recommendation_array(song_name, song_year, number_cols, user_critic_text)
 
231
 
232
  res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
233
  print(res)
234
  print(get_rec_song_uri(res))
235
-
236
-
237
-
238
- # In[35]:
239
-
240
- # In[ ]:
241
-
242
-
243
-
244
-
 
6
  # In[22]:
7
 
8
 
9
+ # import os
10
+ # import difflib
11
  import numpy as np
12
  import pandas as pd
13
  import openai
 
28
  import warnings
29
  warnings.filterwarnings("ignore")
30
 
 
31
  # In[23]:
32
 
33
 
34
  def get_pipeline_data_number_cols():
35
  data = pd.read_csv("data/data.csv")
36
+
37
+ song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
38
+ ('kmeans', KMeans(n_clusters=20,
 
 
39
  verbose=False))
40
+ ], verbose=False)
41
 
42
  X = data.select_dtypes(np.number)
43
  number_cols = list(X.columns)
44
  song_cluster_pipeline.fit(X)
45
  song_cluster_labels = song_cluster_pipeline.predict(X)
46
  data['cluster_label'] = song_cluster_labels
47
+
48
  return song_cluster_pipeline, data, number_cols
49
 
50
 
 
52
 
53
 
54
  def find_song(name, year):
55
+ sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
56
+ client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
57
  song_data = defaultdict()
58
+ results = sp.search(q='track: {} year: {}'.format(name, year), limit=1)
59
  if results['tracks']['items'] == []:
60
  return None
61
 
 
74
 
75
  return pd.DataFrame(song_data)
76
 
77
+
78
  def find_song_uri(name, year):
79
  # Create a Spotify client object.
80
+ client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
81
+ client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
82
  # Get the name of the song you want to get the ID for.
83
  song_name = name
84
  # Call the `search` method with the song name.
85
+ results = client.search(q='track: {} year: {}'.format(name, year), limit=1)
86
  # Get the first result.
87
  track = results['tracks']['items'][0]
88
  # The Spotify ID of the song will be in the `id` property.
89
  song_id = track['uri']
90
  return song_id
91
 
92
+
93
  def format_song(song_data, number_cols):
94
  list_song_data = song_data[number_cols].values.tolist()[0]
95
+ list_song_data = '[' + ', '.join([str(num)
96
+ for num in list_song_data]) + ']'
97
  return list_song_data
98
 
99
+
100
  def get_response(text):
101
+ openai.api_key = "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"
102
 
103
+ response = openai.Completion.create(
104
+ model="text-davinci-003",
105
+ prompt=text,
106
+ temperature=0.7,
107
+ max_tokens=128,
108
+ top_p=1,
109
+ frequency_penalty=0,
110
+ presence_penalty=0
111
+ )
112
 
113
+ return response.choices[0].get("text")
114
 
115
 
116
  # In[25]:
 
130
  75% 0.747 1999 0.893 0.668 262400 0.703 0 0.102 8 0.261 -7.183 1 48 0.0756 135.537 \n \
131
  max 1 2020 0.996 0.988 5403500 1 1 1 11 1 3.855 1 100 0.97 243.507"
132
 
133
+ # init_last = "\n\n start with only typing random future_columns values in given range as a array"
134
+ # user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
 
 
135
  user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] format"
136
  user_last = "\n\n start with the adjust following future_columns based on user_critic. "
137
+ # example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
138
+ # feature_col_starter = "future_columns="
139
  real_features = "future_columns=" + list_song_data
140
+
141
+ # init_input = init_text + init_last
142
+ # test_input = init_text + user_last + user_critic + example_features + user_critic_last
143
+ real_input = init_text + user_last + \
144
+ user_critic + real_features + user_critic_last
145
+
146
  return real_input
147
 
148
 
 
158
  # In[27]:
159
 
160
 
161
+ def recommend_gpt(song_list, spotify_data, song_cluster_pipeline, n_songs=15):
162
  number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
163
+ 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
164
 
 
165
  metadata_cols = ['name', 'year', 'artists']
166
  song_center = np.array(song_list)
167
 
 
171
 
172
  distances = cdist(scaled_song_center, scaled_data, 'cosine')
173
  index = list(np.argsort(distances)[:, :n_songs][0])
174
+
175
  rec_songs = spotify_data.iloc[index]
176
+ # rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
177
  return rec_songs[metadata_cols].to_dict(orient='records')
178
 
179
 
 
183
  def get_rec_song_uri(res):
184
  song_spotipy_info = []
185
  for song in res:
186
+ song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
187
  return song_spotipy_info
188
 
189
 
 
207
  sample = data.sample(n=1)
208
  return sample.name, sample.year
209
 
210
+
211
+ def get_model_values(data_path, file_path, cluster_path):
212
+ data_path = data_path
213
+ file_path = file_path
214
+ cluster_path = cluster_path
215
  # Load the pipeline from the pickle file
216
  with open(file_path, 'rb') as file:
217
  loaded_pipeline = pickle.load(file)
218
  data = pd.read_csv(data_path)
219
  labels = pd.read_csv(cluster_path)
220
  data["cluster_label"] = labels["cluster_label"]
221
+ number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
222
+ 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
223
  return loaded_pipeline, data, number_cols
224
 
225
+
226
  def control():
227
+ # song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
228
+ data_path = "data/data.csv"
229
+ file_path = "data/pipeline.pkl"
230
+ cluster_labels = "data/cluster_labels.csv"
231
+ song_cluster_pipeline, data, number_cols = get_model_values(data_path, file_path, cluster_labels)
232
 
 
 
233
  user_critic_text = "it was dull and very loud"
234
  song_name = "Poem of a Killer"
235
  song_year = 2022
236
+ rec_splitted = get_recommendation_array(
237
+ song_name, song_year, number_cols, user_critic_text)
238
 
239
  res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
240
  print(res)
241
  print(get_rec_song_uri(res))