Spaces:
Runtime error
Runtime error
File size: 3,316 Bytes
51245ea 4da8e4d fa7f40e 9cb5f62 d457c9f 51245ea fa7f40e d457c9f 51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 51245ea 47eae45 51245ea d457c9f 1ab13ba d457c9f 51245ea 9cb5f62 d457c9f 1ab13ba d457c9f fa7f40e d457c9f 1ab13ba d457c9f 51245ea 47eae45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import pickle
import sklearn.preprocessing as pp
from scipy.sparse import csr_matrix
import numpy as np
import pandas as pd
from scipy.sparse import vstack
import global_var
def add_row_train(df, list_tid):
new_pid_add = df.iloc[-1].name +1
list_tid_add = list_tid
list_pos_add = list(range(len(list_tid_add)))
df.loc[new_pid_add] = {'tid': list_tid_add,'pos': list_pos_add}
return df
def inference_row(list_tid, ps_matrix):
ps_matrix_norm = pp.normalize(ps_matrix, axis=1)
length_tid = len(list_tid)
n_songs = ps_matrix.shape[1]
sparse_row = csr_matrix((np.ones(length_tid), (np.zeros(length_tid), list_tid)), shape=(1, n_songs))
sparse_row_norm = pp.normalize(sparse_row, axis=1)
return sparse_row_norm * ps_matrix_norm.T, sparse_row
def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
df_ps_train_extra = pd.read_hdf('data_train/df_ps_train_extra.hdf')
df_ps_train = pd.concat([global_var.df_ps_train_ori,df_ps_train_extra])
sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
sim_vector = sim_vector.toarray()[0].tolist()
# Enumerate index and rating
counter_list = list(enumerate(sim_vector, 0))
# Sort by rating
sortedList = sorted(counter_list, key=lambda x: x[1], reverse=True)
topK_pid = [i for i, _ in sortedList[1:K + 1]]
n = 0
new_list = []
while (1):
top_pid = topK_pid[n]
add_tid_list = df_ps_train.loc[top_pid].tid
# Form new list
new_tid_list = new_list + add_tid_list
new_tid_list = [x for x in new_tid_list if x not in current_list]
new_tid_list = list(dict.fromkeys(new_tid_list))
# Check number of songs and Add to data for prediction
total_song = len(new_tid_list)
# print("n: {}\t total_song: {}".format(n,total_song))
if (total_song > MAX_tid):
new_tid_list = new_tid_list[:MAX_tid]
# Add
new_list = new_tid_list
break
else:
new_list = new_tid_list
n += 1
if (n == K):
break
df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
df_ps_train_extra.to_hdf('data_train/df_ps_train_extra.hdf', key='abc')
return new_list, sparse_row
def inference_from_tid(list_tid, K=50, MAX_tid=10):
# pickle_path = 'data/giantMatrix_truth_new.pickle'
with open("data_mat/giantMatrix_extra.pickle",'rb') as f:
ps_matrix_extra = pickle.load(f)
ps_matrix = vstack((global_var.ps_matrix_ori,ps_matrix_extra))
result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
with open("data_mat/giantMatrix_extra.pickle", 'wb') as f:
pickle.dump(ps_matrix_extra, f)
return result
def inference_from_uri(list_uri, K=50, MAX_tid=10):
with open('model/dict_uri2tid.pkl', 'rb') as f:
dict_uri2tid = pickle.load(f)
list_tid = [dict_uri2tid[x] for x in list_uri if x in dict_uri2tid]
best_tid = inference_from_tid(list_tid, K, MAX_tid)
with open('model/dict_tid2uri.pkl', 'rb') as f:
dict_tid2uri = pickle.load(f)
best_uri = [dict_tid2uri[x] for x in best_tid]
return best_uri
|