Spaces:

alanahmet
/

nlp_based_song_recommender

Sleeping

App Files Files Community

alanahmet commited on Jun 1, 2023

Commit

2167e1f

1 Parent(s): 1454959

bugs fixed

Browse files

Files changed (6) hide show

.cache +1 -0
__pycache__/spotify_music_recommender.cpython-310.pyc +0 -0
app.py +18 -16
cluster_labels.csv → data/cluster_labels.csv +0 -0
pipeline.pkl → data/pipeline.pkl +0 -0
spotify_music_recommender.py +59 -62

.cache ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"access_token": "BQCcgc_4zWsgVUt9b9_r6ka5YkOS0KReZE6pPHKS0ELyM5K3KOfyyUyED87zZORxZbpPAGRxFEpQ8e24eQo6Mleh2xP8tZzVKkqnCDszX68thYCwqbc", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1685645349}

__pycache__/spotify_music_recommender.cpython-310.pyc ADDED Viewed

Binary file (7.81 kB). View file

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ from streamlit_option_menu import option_menu
 import streamlit.components.v1 as components
 import spotify_music_recommender as smr
 def song_page(name, year):
     song_uri = smr.find_song_uri(name, year)
@@ -15,7 +18,7 @@ def spr_sidebar():
     menu = option_menu(
         menu_title=None,
         options=['Home', 'Results', 'About'],
-        icons=['house', 'book', 'info-square', 'gear'],  # terminal
         menu_icon='cast',
         default_index=0,
         orientation='horizontal'
@@ -36,13 +39,13 @@ def home_page():
     st.title("Spotify Music Recommender")
     # Song input section
-    #st.subheader("")
     col1, col2 = st.columns(2)
     song_input = col1.text_input("Enter a song:")
     year_input = col2.text_input("Enter the year:")
     # Button section
-    #st.subheader("")
     col3, col4 = st.columns(2)
     find_song_button = col3.button("Find Song")
     find_random_song_button = col4.button("Random Song")
@@ -54,25 +57,27 @@ def home_page():
     # Prediction button
     predict_button = st.button("Start Prediction")
-    st.markdown("<br>", unsafe_allow_html=True)
-    # Url = st.text_input(label="Song Url",key='song_url',on_change=update_song_url)
     if find_song_button:
         song_page(song_input, year_input)
     elif find_random_song_button:
         find_random_song()
-    elif song_input == "" and year_input == "":
         find_random_song()
     if predict_button:
         with st.spinner('Getting Recommendations...'):
             try:
-                song_cluster_pipeline, data, number_cols = smr.get_model_values()
                 user_critic_text = critic_input
                 rec_splitted = smr.get_recommendation_array(
                     song_input, year_input, number_cols, user_critic_text)
                 res = smr.recommend_gpt(
-                    rec_splitted, data, song_cluster_pipeline)
                 st.session_state.song_uris = smr.get_rec_song_uri(res)
                 st.write("You can access recommended song at result page")
             except:
@@ -103,18 +108,15 @@ def find_random_song():
 def result_page():
-    try:
-        i = 0
         for uri in st.session_state.song_uris:
             uri = uri.split(":")[-1]
             uri_link = "https://open.spotify.com/embed/track/" + \
                 uri + "?utm_source=generator&theme=0"
             components.iframe(uri_link, height=80)
-            i += 1
-            if i % 5 == 0:
-                time.sleep(1)
-    except:
-        st.subheader("Please enter song informations and review then click start prediction")
 def examples_page():

 import streamlit.components.v1 as components
 import spotify_music_recommender as smr
+if "song_init" not in st.session_state:
+    st.session_state.song_init = False
 def song_page(name, year):
     song_uri = smr.find_song_uri(name, year)
     menu = option_menu(
         menu_title=None,
         options=['Home', 'Results', 'About'],
+        icons=['house', 'book', 'info-square'],
         menu_icon='cast',
         default_index=0,
         orientation='horizontal'
     st.title("Spotify Music Recommender")
     # Song input section
+    # st.subheader("")
     col1, col2 = st.columns(2)
     song_input = col1.text_input("Enter a song:")
     year_input = col2.text_input("Enter the year:")
     # Button section
+    # st.subheader("")
     col3, col4 = st.columns(2)
     find_song_button = col3.button("Find Song")
     find_random_song_button = col4.button("Random Song")
     # Prediction button
     predict_button = st.button("Start Prediction")
     if find_song_button:
         song_page(song_input, year_input)
     elif find_random_song_button:
         find_random_song()
+    elif song_input == "" and year_input == "" and not st.session_state.song_init:
+        st.session_state.song_init = True
         find_random_song()
     if predict_button:
         with st.spinner('Getting Recommendations...'):
             try:
+                data_path = "data/data.csv"
+                file_path = "data/pipeline.pkl"
+                cluster_labels = "data/cluster_labels.csv"
+                song_cluster_pipeline, data, number_cols = smr.get_model_values(
+                    data_path, file_path, cluster_labels)
                 user_critic_text = critic_input
                 rec_splitted = smr.get_recommendation_array(
                     song_input, year_input, number_cols, user_critic_text)
                 res = smr.recommend_gpt(
+                    rec_splitted, data, song_cluster_pipeline, 15)
                 st.session_state.song_uris = smr.get_rec_song_uri(res)
                 st.write("You can access recommended song at result page")
             except:
 def result_page():
+    if "song_uris" in st.session_state:
         for uri in st.session_state.song_uris:
             uri = uri.split(":")[-1]
             uri_link = "https://open.spotify.com/embed/track/" + \
                 uri + "?utm_source=generator&theme=0"
             components.iframe(uri_link, height=80)
+    else:
+        st.subheader(
+            "Please enter song informations and review then click start prediction")
 def examples_page():

cluster_labels.csv → data/cluster_labels.csv RENAMED Viewed

File without changes

pipeline.pkl → data/pipeline.pkl RENAMED Viewed

File without changes

spotify_music_recommender.py CHANGED Viewed

@@ -6,8 +6,8 @@
 # In[22]:
-#import os
-#import difflib
 import numpy as np
 import pandas as pd
 import openai
@@ -28,26 +28,23 @@ from collections import defaultdict
 import warnings
 warnings.filterwarnings("ignore")
 # In[23]:
 def get_pipeline_data_number_cols():
     data = pd.read_csv("data/data.csv")
-    # genre_data = pd.read_csv('data/data_by_genres.csv')
-    # year_data = pd.read_csv('data/data_by_year.csv')
-    song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
-                                      ('kmeans', KMeans(n_clusters=20,
                                        verbose=False))
-                                     ], verbose=False)
     X = data.select_dtypes(np.number)
     number_cols = list(X.columns)
     song_cluster_pipeline.fit(X)
     song_cluster_labels = song_cluster_pipeline.predict(X)
     data['cluster_label'] = song_cluster_labels
     return song_cluster_pipeline, data, number_cols
@@ -55,9 +52,10 @@ def get_pipeline_data_number_cols():
 def find_song(name, year):
-    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
     song_data = defaultdict()
-    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
     if results['tracks']['items'] == []:
         return None
@@ -76,38 +74,43 @@ def find_song(name, year):
     return pd.DataFrame(song_data)
 def find_song_uri(name, year):
     # Create a Spotify client object.
-    client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
     # Get the name of the song you want to get the ID for.
     song_name = name
     # Call the `search` method with the song name.
-    results = client.search(q= 'track: {} year: {}'.format(name,year), limit=1)
     # Get the first result.
     track = results['tracks']['items'][0]
     # The Spotify ID of the song will be in the `id` property.
     song_id = track['uri']
     return song_id
 def format_song(song_data, number_cols):
     list_song_data = song_data[number_cols].values.tolist()[0]
-    list_song_data = '[' + ', '.join([str(num) for num in list_song_data]) + ']'
     return list_song_data
 def get_response(text):
-  openai.api_key = "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"
-  response = openai.Completion.create(
-    model="text-davinci-003",
-    prompt=text,
-    temperature=0.7,
-    max_tokens=128,
-    top_p=1,
-    frequency_penalty=0,
-    presence_penalty=0
-  )
-  return response.choices[0].get("text")
 # In[25]:
@@ -127,20 +130,19 @@ def get_text(user_critic, list_song_data):
     75%	0.747	1999	0.893	0.668	262400	0.703	0	0.102	8	0.261	-7.183	1	48	0.0756	135.537 \n \
     max	1	2020	0.996	0.988	5403500	1	1	1	11	1	3.855	1	100	0.97	243.507"
-    init_last = "\n\n start with only typing random  future_columns values in given range as a array"
-    #user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
     user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>]  format"
     user_last = "\n\n start with the adjust following future_columns based on user_critic. "
-    #example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
-    #feature_col_starter = "future_columns="
     real_features = "future_columns=" + list_song_data
-    #init_input = init_text + init_last
-    #test_input = init_text + user_last + user_critic + example_features + user_critic_last
-    real_input = init_text + user_last + user_critic + real_features + user_critic_last
     return real_input
@@ -156,11 +158,10 @@ def format_gpt_output(rec_splitted):
 # In[27]:
-def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
     number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
- 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     metadata_cols = ['name', 'year', 'artists']
     song_center = np.array(song_list)
@@ -170,9 +171,9 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
     distances = cdist(scaled_song_center, scaled_data, 'cosine')
     index = list(np.argsort(distances)[:, :n_songs][0])
     rec_songs = spotify_data.iloc[index]
-    #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
     return rec_songs[metadata_cols].to_dict(orient='records')
@@ -182,7 +183,7 @@ def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
 def get_rec_song_uri(res):
     song_spotipy_info = []
     for song in res:
-        song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
     return song_spotipy_info
@@ -206,39 +207,35 @@ def get_random_song():
     sample = data.sample(n=1)
     return sample.name, sample.year
-def get_model_values():
-    data_path = "data/data.csv"
-    file_path = "pipeline.pkl"
-    cluster_path = "cluster_labels.csv"
     # Load the pipeline from the pickle file
     with open(file_path, 'rb') as file:
         loaded_pipeline = pickle.load(file)
     data = pd.read_csv(data_path)
     labels = pd.read_csv(cluster_path)
     data["cluster_label"] = labels["cluster_label"]
-    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     return loaded_pipeline, data, number_cols
 def control():
-    #song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
-    song_cluster_pipeline, data, number_cols = get_model_values()
     user_critic_text = "it was dull and very loud"
     song_name = "Poem of a Killer"
     song_year = 2022
-    rec_splitted = get_recommendation_array(song_name, song_year, number_cols, user_critic_text)
     res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
     print(res)
     print(get_rec_song_uri(res))
-# In[35]:
-# In[ ]:

 # In[22]:
+# import os
+# import difflib
 import numpy as np
 import pandas as pd
 import openai
 import warnings
 warnings.filterwarnings("ignore")
 # In[23]:
 def get_pipeline_data_number_cols():
     data = pd.read_csv("data/data.csv")
+    song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
+                                      ('kmeans', KMeans(n_clusters=20,
                                        verbose=False))
+                                      ], verbose=False)
     X = data.select_dtypes(np.number)
     number_cols = list(X.columns)
     song_cluster_pipeline.fit(X)
     song_cluster_labels = song_cluster_pipeline.predict(X)
     data['cluster_label'] = song_cluster_labels
     return song_cluster_pipeline, data, number_cols
 def find_song(name, year):
+    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
+        client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
     song_data = defaultdict()
+    results = sp.search(q='track: {} year: {}'.format(name, year), limit=1)
     if results['tracks']['items'] == []:
         return None
     return pd.DataFrame(song_data)
 def find_song_uri(name, year):
     # Create a Spotify client object.
+    client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
+        client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
     # Get the name of the song you want to get the ID for.
     song_name = name
     # Call the `search` method with the song name.
+    results = client.search(q='track: {} year: {}'.format(name, year), limit=1)
     # Get the first result.
     track = results['tracks']['items'][0]
     # The Spotify ID of the song will be in the `id` property.
     song_id = track['uri']
     return song_id
 def format_song(song_data, number_cols):
     list_song_data = song_data[number_cols].values.tolist()[0]
+    list_song_data = '[' + ', '.join([str(num)
+                                     for num in list_song_data]) + ']'
     return list_song_data
 def get_response(text):
+    openai.api_key = "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"
+    response = openai.Completion.create(
+        model="text-davinci-003",
+        prompt=text,
+        temperature=0.7,
+        max_tokens=128,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0
+    )
+    return response.choices[0].get("text")
 # In[25]:
     75%	0.747	1999	0.893	0.668	262400	0.703	0	0.102	8	0.261	-7.183	1	48	0.0756	135.537 \n \
     max	1	2020	0.996	0.988	5403500	1	1	1	11	1	3.855	1	100	0.97	243.507"
+    # init_last = "\n\n start with only typing random  future_columns values in given range as a array"
+    # user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
     user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>]  format"
     user_last = "\n\n start with the adjust following future_columns based on user_critic. "
+    # example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
+    # feature_col_starter = "future_columns="
     real_features = "future_columns=" + list_song_data
+    # init_input = init_text + init_last
+    # test_input = init_text + user_last + user_critic + example_features + user_critic_last
+    real_input = init_text + user_last + \
+        user_critic + real_features + user_critic_last
     return real_input
 # In[27]:
+def recommend_gpt(song_list, spotify_data, song_cluster_pipeline, n_songs=15):
     number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
+                   'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     metadata_cols = ['name', 'year', 'artists']
     song_center = np.array(song_list)
     distances = cdist(scaled_song_center, scaled_data, 'cosine')
     index = list(np.argsort(distances)[:, :n_songs][0])
     rec_songs = spotify_data.iloc[index]
+    # rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
     return rec_songs[metadata_cols].to_dict(orient='records')
 def get_rec_song_uri(res):
     song_spotipy_info = []
     for song in res:
+        song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
     return song_spotipy_info
     sample = data.sample(n=1)
     return sample.name, sample.year
+def get_model_values(data_path, file_path, cluster_path):
+    data_path = data_path
+    file_path = file_path
+    cluster_path = cluster_path
     # Load the pipeline from the pickle file
     with open(file_path, 'rb') as file:
         loaded_pipeline = pickle.load(file)
     data = pd.read_csv(data_path)
     labels = pd.read_csv(cluster_path)
     data["cluster_label"] = labels["cluster_label"]
+    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
+                   'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
     return loaded_pipeline, data, number_cols
 def control():
+    # song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
+    data_path = "data/data.csv"
+    file_path = "data/pipeline.pkl"
+    cluster_labels = "data/cluster_labels.csv"
+    song_cluster_pipeline, data, number_cols = get_model_values(data_path, file_path, cluster_labels)
     user_critic_text = "it was dull and very loud"
     song_name = "Poem of a Killer"
     song_year = 2022
+    rec_splitted = get_recommendation_array(
+        song_name, song_year, number_cols, user_critic_text)
     res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
     print(res)
     print(get_rec_song_uri(res))