Spaces:

nandovallec
/

spotify-recommender

Running

App Files Files Community

nandovallec commited on Jan 6, 2023

Commit

51245ea

1 Parent(s): c9bd358

Initial

Browse files

Files changed (4) hide show

fetchPlaylistTrackUris.py +58 -0
model/dict_tid2uri.pkl +3 -0
model/dict_uri2tid.pkl +3 -0
recommender.py +81 -0

fetchPlaylistTrackUris.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import requests
+import base64
+import json
+import os
+import sys
+client_id = os.environ["CLIENT_ID"]
+client_secret= os.environ["CLIENT_SECRET"]
+def get_playlist_track_uris(playlist_id):
+    access_token = get_access_token(client_id, client_secret)
+    playlist_data = get_playlist_data(access_token, playlist_id)
+    # Output the playlist data to a file
+    # with open('playlist-tracks.json', 'w') as outfile:
+        # json.dump(json.loads(playlist_response.text), outfile)
+    track_uris = [item['track']['uri'] for item in playlist_data['tracks']['items']]
+    print(track_uris)
+    # Output the track uris into a file
+    # with open('track-uris-new.txt', 'w') as output_file:
+    #     output_file.write('\n'.join(track_uris))
+    return track_uris
+def get_access_token(client_id, client_secret) -> str:
+    base64_string = base64.b64encode((client_id + ':' + client_secret).encode('ascii')).decode('ascii')
+    auth_headers = {
+            'Authorization': 'Basic ' + base64_string,
+            'Content-type': 'application/x-www-form-urlencoded'
+    }
+    auth_data = {'grant_type': 'client_credentials'}
+    auth_response = requests.post('https://accounts.spotify.com/api/token', headers=auth_headers, json=True, data=auth_data)
+    access_token = json.loads(auth_response.text)['access_token']
+    return access_token
+def get_playlist_data(access_token, playlist_id):
+    get_playlist_headers = {
+        'Authorization': 'Bearer ' + access_token,
+        'Content-Type': 'application/json',
+    }
+    playlist_response = requests.get('https://api.spotify.com/v1/playlists/' + playlist_id, headers=get_playlist_headers)
+    playlist_data = json.loads(playlist_response.text)
+    return playlist_data
+if __name__ == "__main__":
+    playlist_id = sys.argv[1]
+    get_playlist_track_uris(playlist_id)

model/dict_tid2uri.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b52797435b4c60789b15afd28f846064645898376cfd3e4aabc36609770477cb
+size 30017867

model/dict_uri2tid.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85fe3ebd1c087df637f92f561c48f8de71f3edee0dc357a42e60fb906f3c88cf
+size 30017867

recommender.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import pickle
+import sklearn.preprocessing as pp
+from scipy.sparse import csr_matrix
+import numpy as np
+import pandas as pd
+def inference_row(list_tid, ps_matrix):
+    ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
+    length_tid = len(list_tid)
+    n_songs = ps_matrix.shape[1]
+    sparse_row = csr_matrix((np.ones(length_tid), (np.zeros(length_tid), list_tid)), shape=(1, n_songs))
+    sparse_row_norm = pp.normalize(sparse_row, axis=1)
+    return sparse_row_norm * ps_matrix_norm.T, sparse_row
+def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
+    df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
+    sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
+    sim_vector = sim_vector.toarray()[0].tolist()
+    # Enumerate index and rating
+    counter_list = list(enumerate(sim_vector, 0))
+    # Sort by rating
+    sortedList = sorted(counter_list, key=lambda x: x[1], reverse=True)
+    topK_pid = [i for i, _ in sortedList[1:K + 1]]
+    n = 0
+    while (1):
+        top_pid = topK_pid[n]
+        add_tid_list = df_ps_train.loc[top_pid].tid
+        # Form new list
+        new_tid_list = current_list + add_tid_list
+        new_tid_list = list(dict.fromkeys(new_tid_list))
+        # Check number of songs and Add to data for prediction
+        total_song = len(new_tid_list)
+        #            print("n: {}\t total_song: {}".format(n,total_song))
+        if (total_song > MAX_tid):
+            new_tid_list = new_tid_list[:MAX_tid]
+            # Add
+            current_list = new_tid_list
+            break
+        else:
+            current_list = new_tid_list
+        n += 1
+        if (n == K):
+            break
+    return current_list
+def inference_from_tid(list_tid, K=50, MAX_tid=10):
+    pickle_path = 'model/giantMatrix_new.pickle'
+    # pickle_path = 'data/giantMatrix_truth_new.pickle'
+    with open(pickle_path, 'rb') as f:
+        ps_matrix = pickle.load(f)
+    ps_matrix_row = ps_matrix.tocsr()
+    return get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
+def inference_from_uri(list_uri, K=50, MAX_tid=10):
+    with open('model/dict_uri2tid.pkl', 'rb') as f:
+        dict_uri2tid = pickle.load(f)
+    list_tid = [dict_uri2tid[x] for x in list_uri if x in dict_uri2tid]
+    best_tid = inference_from_tid(list_tid, K, MAX_tid)
+    with open('model/dict_tid2uri.pkl', 'rb') as f:
+        dict_tid2uri = pickle.load(f)
+    best_uri = [dict_tid2uri[x] for x in best_tid]
+    return best_uri