Spaces:

alanahmet
/

nlp_based_song_recommender

Sleeping

App Files Files Community

alanahmet commited on May 31, 2023

Commit

1454959

1 Parent(s): a6458f8

init

Browse files

Files changed (6) hide show

app.py +177 -0
cluster_labels.csv +0 -0
data/data.csv +0 -0
pipeline.pkl +3 -0
requirements.txt +8 -0
spotify_music_recommender.py +244 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import streamlit as st
+from streamlit_option_menu import option_menu
+import streamlit.components.v1 as components
+import spotify_music_recommender as smr
+def song_page(name, year):
+    song_uri = smr.find_song_uri(name, year)
+    formatted_song_uri = song_uri.split(':')[-1]
+    uri_link = f'https://open.spotify.com/embed/track/{formatted_song_uri}?utm_source=generator'
+    components.iframe(uri_link, height=100)
+def spr_sidebar():
+    menu = option_menu(
+        menu_title=None,
+        options=['Home', 'Results', 'About'],
+        icons=['house', 'book', 'info-square', 'gear'],  # terminal
+        menu_icon='cast',
+        default_index=0,
+        orientation='horizontal'
+    )
+    if menu == 'Home':
+        st.session_state.app_mode = 'Home'
+    elif menu == 'Results':
+        st.session_state.app_mode = 'Results'
+    elif menu == 'About':
+        st.session_state.app_mode = 'About'
+    # elif menu == 'How It Works':
+    #     st.session_state.app_mode = 'How It Works'
+def home_page():
+    # App layout
+    st.title("Spotify Music Recommender")
+    # Song input section
+    #st.subheader("")
+    col1, col2 = st.columns(2)
+    song_input = col1.text_input("Enter a song:")
+    year_input = col2.text_input("Enter the year:")
+    # Button section
+    #st.subheader("")
+    col3, col4 = st.columns(2)
+    find_song_button = col3.button("Find Song")
+    find_random_song_button = col4.button("Random Song")
+    # Critic input section
+    st.subheader("Song Review")
+    critic_input = st.text_input("")
+    # Prediction button
+    predict_button = st.button("Start Prediction")
+    st.markdown("<br>", unsafe_allow_html=True)
+    # Url = st.text_input(label="Song Url",key='song_url',on_change=update_song_url)
+    if find_song_button:
+        song_page(song_input, year_input)
+    elif find_random_song_button:
+        find_random_song()
+    elif song_input == "" and year_input == "":
+        find_random_song()
+    if predict_button:
+        with st.spinner('Getting Recommendations...'):
+            try:
+                song_cluster_pipeline, data, number_cols = smr.get_model_values()
+                user_critic_text = critic_input
+                rec_splitted = smr.get_recommendation_array(
+                    song_input, year_input, number_cols, user_critic_text)
+                res = smr.recommend_gpt(
+                    rec_splitted, data, song_cluster_pipeline)
+                st.session_state.song_uris = smr.get_rec_song_uri(res)
+                st.write("You can access recommended song at result page")
+            except:
+                st.write("An error occured please try again")
+# def text_field(label, columns=None, **input_params):
+#     c1, c2 = st.columns(columns or [1, 4])
+#     # Display field name with some alignment
+#     c1.markdown("##")
+#     c1.markdown(label)
+#     # Sets a default key parameter to avoid duplicate key errors
+#     input_params.setdefault("key", label)
+#     # Forward text input parameters
+#     return c2.text_input("", **input_params)
+def find_random_song():
+    try:
+        song_input, year_input = smr.get_random_song()
+        song_page(song_input, year_input)
+    except:
+        song_input, year_input = "Heat Waves", "2020"
+        song_page(song_input, year_input)
+def result_page():
+    try:
+        i = 0
+        for uri in st.session_state.song_uris:
+            uri = uri.split(":")[-1]
+            uri_link = "https://open.spotify.com/embed/track/" + \
+                uri + "?utm_source=generator&theme=0"
+            components.iframe(uri_link, height=80)
+            i += 1
+            if i % 5 == 0:
+                time.sleep(1)
+    except:
+        st.subheader("Please enter song informations and review then click start prediction")
+def examples_page():
+    st.subheader("Will added")
+def About_page():
+    st.header('Development')
+    """
+    Have you ever listened to a song and liked it overall, but felt that certain features could be improved? Maybe the chorus was too loud, the energy level wasn't quite right, or there were either too many or too few words. I've had those experiences too, and that's what inspired me to create a song recommender based on user reviews.
+    The process is straightforward: simply type in the name of a song or choose a random one, and then enter your review. The recommender will analyze your review using ChatGPT and utilize the Spotify API to generate personalized song recommendations. It's an exciting way to enhance your music discovery and tailor the recommendations to your specific preferences.
+    Although it's important to note that the dataset used for training the model was relatively small (170k), the recommender still aims to provide valuable suggestions. While it may not reach its full potential due to the limited data, it serves as a starting point for exploring new songs that align with your individual tastes.
+    So, if you're looking to fine-tune your music experience and discover songs that better match your preferences, give this song recommender a try. Enter your review, and let the algorithm work its magic to recommend songs that you're more likely to enjoy.\n
+    Github : [alanahmet](https://github.com/alanahmet) \n
+    Mail : [email protected]
+    """
+    st.subheader('Audio Features Explanation')
+    """
+    | Variable | Description |
+    | :----: | :---: |
+    | Acousticness | A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic. |
+    | Danceability | Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable. |
+    | Energy | Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy. |
+    | Instrumentalness | Predicts whether a track contains no vocals. "Ooh" and "aah" sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly "vocal". The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0. |
+    | Key | The key the track is in. Integers map to pitches using standard Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D, and so on. If no key was detected, the value is -1. |
+    | Liveness | Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides strong likelihood that the track is live. |
+    | Loudness | The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track and are useful for comparing relative loudness of tracks. Loudness is the quality of a sound that is the primary psychological correlate of physical strength (amplitude). Values typically range between -60 and 0 db. |
+    | Mode | Mode indicates the modality (major or minor) of a track, the type of scale from which its melodic content is derived. Major is represented by 1 and minor is 0. |
+    | Speechiness | Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks. |
+    | Tempo | The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece and derives directly from the average beat duration. |
+    | Time Signature | An estimated time signature. The time signature (meter) is a notational convention to specify how many beats are in each bar (or measure). The time signature ranges from 3 to 7 indicating time signatures of "3/4", to "7/4". |
+    | Valence | A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry). |
+    Information about features: [here](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features)
+    """
+    st.subheader('Credit')
+    """
+    Thanks for base of streamlit application to [abdelrhmanelruby](https://github.com/abdelrhmanelruby/Spotify-Recommendation-System) and dataset can be found [here](https://www.kaggle.com/datasets/vatsalmavani/spotify-dataset)
+    """
+def main():
+    spr_sidebar()
+    if st.session_state.app_mode == 'Home':
+        home_page()
+    if st.session_state.app_mode == 'Results':
+        result_page()
+    if st.session_state.app_mode == 'About':
+        About_page()
+    # if st.session_state.app_mode == 'How It Works':
+    #     examples_page()
+# Run main()
+if __name__ == '__main__':
+    main()

cluster_labels.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38e51de0983dff547c840fdf71c2483d8f4c66dbe2c7aa6c7c384712c6f9f2e5
+size 686621

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit-option-menu
+streamlit
+scikit-learn
+openai
+numpy
+pandas
+spotipy
+scipy

spotify_music_recommender.py ADDED Viewed

	@@ -0,0 +1,244 @@

+#!/usr/bin/env python
+# coding: utf-8
+# # **Import Libraries**
+# In[22]:
+#import os
+#import difflib
+import numpy as np
+import pandas as pd
+import openai
+import spotipy
+import pickle
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import Pipeline
+from sklearn.manifold import TSNE
+from sklearn.decomposition import PCA
+from sklearn.metrics import euclidean_distances
+from scipy.spatial.distance import cdist
+from spotipy.oauth2 import SpotifyClientCredentials
+from collections import defaultdict
+import warnings
+warnings.filterwarnings("ignore")
+# In[23]:
+def get_pipeline_data_number_cols():
+    data = pd.read_csv("data/data.csv")
+    # genre_data = pd.read_csv('data/data_by_genres.csv')
+    # year_data = pd.read_csv('data/data_by_year.csv')
+    song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
+                                      ('kmeans', KMeans(n_clusters=20,
+                                       verbose=False))
+                                     ], verbose=False)
+    X = data.select_dtypes(np.number)
+    number_cols = list(X.columns)
+    song_cluster_pipeline.fit(X)
+    song_cluster_labels = song_cluster_pipeline.predict(X)
+    data['cluster_label'] = song_cluster_labels
+    return song_cluster_pipeline, data, number_cols
+# In[24]:
+def find_song(name, year):
+    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
+    song_data = defaultdict()
+    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
+    if results['tracks']['items'] == []:
+        return None
+    results = results['tracks']['items'][0]
+    track_id = results['id']
+    audio_features = sp.audio_features(track_id)[0]
+    song_data['name'] = [name]
+    song_data['year'] = [year]
+    song_data['explicit'] = [int(results['explicit'])]
+    song_data['duration_ms'] = [results['duration_ms']]
+    song_data['popularity'] = [results['popularity']]
+    for key, value in audio_features.items():
+        song_data[key] = value
+    return pd.DataFrame(song_data)
+def find_song_uri(name, year):
+    # Create a Spotify client object.
+    client = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="e941ee9577244e08a1741a6b8183b346", client_secret="d5990a6f11e442fe8e897da07b3f6277"))
+    # Get the name of the song you want to get the ID for.
+    song_name = name
+    # Call the `search` method with the song name.
+    results = client.search(q= 'track: {} year: {}'.format(name,year), limit=1)
+    # Get the first result.
+    track = results['tracks']['items'][0]
+    # The Spotify ID of the song will be in the `id` property.
+    song_id = track['uri']
+    return song_id
+def format_song(song_data, number_cols):
+    list_song_data = song_data[number_cols].values.tolist()[0]
+    list_song_data = '[' + ', '.join([str(num) for num in list_song_data]) + ']'
+    return list_song_data
+def get_response(text):
+  openai.api_key = "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"
+  response = openai.Completion.create(
+    model="text-davinci-003",
+    prompt=text,
+    temperature=0.7,
+    max_tokens=128,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0
+  )
+  return response.choices[0].get("text")
+# In[25]:
+def get_text(user_critic, list_song_data):
+    init_text = "I want you to act as a song recommender. I will provide you songs data with following format f future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>] \
+     values and user critic about the given song. And you will provide an array based on user critic.You must change at least 3 features. Do not write any explanations or other words, just return an array that include changes in future_columns\
+    and here is the describe values of future_columns  \n\
+    valence	year	acousticness	danceability	duration_ms	energy	explicit	instrumentalness	key	liveness	loudness	mode	popularity	speechiness	tempo \n \
+    count	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653	170653 \n \
+    mean	0.528587211	1976.787241	0.502114764	0.537395535	230948.3107	0.482388835	0.084575132	0.167009581	5.199844128	0.205838655	-11.46799004	0.706902311	31.43179434	0.098393262	116.8615896 \n \
+    std	0.263171464	25.91785256	0.376031725	0.176137736	126118.4147	0.267645705	0.278249228	0.313474674	3.515093906	0.174804661	5.697942912	0.455184191	21.82661514	0.162740072	30.70853304 \n \
+    min	0	1921	0	0	5108	0	0	0	0	0	-60	0	0	0	0 \n \
+    25%	0.317	1956	0.102	0.415	169827	0.255	0	0	2	0.0988	-14.615	0	11	0.0349	93.421 \n \
+    50%	0.54	1977	0.516	0.548	207467	0.471	0	0.000216	5	0.136	-10.58	1	33	0.045	114.729 \n \
+    75%	0.747	1999	0.893	0.668	262400	0.703	0	0.102	8	0.261	-7.183	1	48	0.0756	135.537 \n \
+    max	1	2020	0.996	0.988	5403500	1	1	1	11	1	3.855	1	100	0.97	243.507"
+    init_last = "\n\n start with only typing random  future_columns values in given range as a array"
+    #user_critic_ex = "\n \"user_critic=it was too old and loud but i like the energy\" "
+    user_critic_last = "your output will be future_columns=[ <valence>, <published_year>, <acousticness>, <danceability>, <duration_ms>, <energy>, <explicit>,<instrumentalness>, <key>, <liveness>, <loudness>, <mode>, <popularity>, <speechiness>, <tempo>]  format"
+    user_last = "\n\n start with the adjust following future_columns based on user_critic. "
+    #example_features = "future_columns=[0.68, 1976, 0.78, 0.62, 230948.3, 0.44, 0.22, 0.43, 5.2, 0.27, -9.67, 1, 31, 0.19, 118.86]"
+    #feature_col_starter = "future_columns="
+    real_features = "future_columns=" + list_song_data
+    #init_input = init_text + init_last
+    #test_input = init_text + user_last + user_critic + example_features + user_critic_last
+    real_input = init_text + user_last + user_critic + real_features + user_critic_last
+    return real_input
+# In[26]:
+def format_gpt_output(rec_splitted):
+    formatted = rec_splitted[3:-1].split(",")
+    list_song_data = [float(i) for i in formatted]
+    return list_song_data
+# In[27]:
+def recommend_gpt( song_list, spotify_data,song_cluster_pipeline, n_songs=15):
+    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
+ 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
+    metadata_cols = ['name', 'year', 'artists']
+    song_center = np.array(song_list)
+    scaler = song_cluster_pipeline.steps[0][1]
+    scaled_data = scaler.transform(spotify_data[number_cols])
+    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
+    distances = cdist(scaled_song_center, scaled_data, 'cosine')
+    index = list(np.argsort(distances)[:, :n_songs][0])
+    rec_songs = spotify_data.iloc[index]
+    #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
+    return rec_songs[metadata_cols].to_dict(orient='records')
+# In[28]:
+def get_rec_song_uri(res):
+    song_spotipy_info = []
+    for song in res:
+        song_spotipy_info.append(find_song_uri(song["name"], song["year"]))
+    return song_spotipy_info
+# In[30]:
+def get_recommendation_array(song_name, song_year, number_cols, user_critic_text):
+    song_data = find_song(song_name, song_year)
+    list_song_data = format_song(song_data, number_cols)
+    user_critic = "\n \"user_critic=" + user_critic_text
+    recommendation = get_response(get_text(user_critic, list_song_data))
+    rec_splitted = format_gpt_output(recommendation)
+    return rec_splitted
+# In[34]:
+def get_random_song():
+    data = pd.read_csv("data/data.csv")
+    sample = data.sample(n=1)
+    return sample.name, sample.year
+def get_model_values():
+    data_path = "data/data.csv"
+    file_path = "pipeline.pkl"
+    cluster_path = "cluster_labels.csv"
+    # Load the pipeline from the pickle file
+    with open(file_path, 'rb') as file:
+        loaded_pipeline = pickle.load(file)
+    data = pd.read_csv(data_path)
+    labels = pd.read_csv(cluster_path)
+    data["cluster_label"] = labels["cluster_label"]
+    number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
+    return loaded_pipeline, data, number_cols
+def control():
+    #song_cluster_pipeline, data, number_cols = get_pipeline_data_number_cols()
+    song_cluster_pipeline, data, number_cols = get_model_values()
+    user_critic_text = "it was dull and very loud"
+    song_name = "Poem of a Killer"
+    song_year = 2022
+    rec_splitted = get_recommendation_array(song_name, song_year, number_cols, user_critic_text)
+    res = recommend_gpt(rec_splitted, data, song_cluster_pipeline)
+    print(res)
+    print(get_rec_song_uri(res))
+# In[35]:
+# In[ ]: