Spaces:
Runtime error
Runtime error
File size: 2,543 Bytes
51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import pickle
import sklearn.preprocessing as pp
from scipy.sparse import csr_matrix
import numpy as np
import pandas as pd
def inference_row(list_tid, ps_matrix):
ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
length_tid = len(list_tid)
n_songs = ps_matrix.shape[1]
sparse_row = csr_matrix((np.ones(length_tid), (np.zeros(length_tid), list_tid)), shape=(1, n_songs))
sparse_row_norm = pp.normalize(sparse_row, axis=1)
return sparse_row_norm * ps_matrix_norm.T, sparse_row
def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
sim_vector = sim_vector.toarray()[0].tolist()
# Enumerate index and rating
counter_list = list(enumerate(sim_vector, 0))
# Sort by rating
sortedList = sorted(counter_list, key=lambda x: x[1], reverse=True)
topK_pid = [i for i, _ in sortedList[1:K + 1]]
n = 0
new_list = []
while (1):
top_pid = topK_pid[n]
add_tid_list = df_ps_train.loc[top_pid].tid
# Form new list
new_tid_list = new_list + add_tid_list
new_tid_list = [x for x in new_tid_list if x not in current_list]
new_tid_list = list(dict.fromkeys(new_tid_list))
# Check number of songs and Add to data for prediction
total_song = len(new_tid_list)
# print("n: {}\t total_song: {}".format(n,total_song))
if (total_song > MAX_tid):
new_tid_list = new_tid_list[:MAX_tid]
# Add
new_list = new_tid_list
break
else:
new_list = new_tid_list
n += 1
if (n == K):
break
return new_list
def inference_from_tid(list_tid, K=50, MAX_tid=10):
pickle_path = 'model/giantMatrix_new.pickle'
# pickle_path = 'data/giantMatrix_truth_new.pickle'
with open(pickle_path, 'rb') as f:
ps_matrix = pickle.load(f)
ps_matrix_row = ps_matrix.tocsr()
return get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
def inference_from_uri(list_uri, K=50, MAX_tid=10):
with open('model/dict_uri2tid.pkl', 'rb') as f:
dict_uri2tid = pickle.load(f)
list_tid = [dict_uri2tid[x] for x in list_uri if x in dict_uri2tid]
best_tid = inference_from_tid(list_tid, K, MAX_tid)
with open('model/dict_tid2uri.pkl', 'rb') as f:
dict_tid2uri = pickle.load(f)
best_uri = [dict_tid2uri[x] for x in best_tid]
return best_uri
|