File size: 3,608 Bytes
9c58361 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import sys
import time
import yaml
import pickle
import multiprocessing as mp
import numpy as np
from project.evaluation.metrics import get_metrics
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
def write_rec(pwd, sessions):
f = open(pwd, 'wb')
pickle.dump(sessions, f, protocol=pickle.HIGHEST_PROTOCOL)
f.close()
def recs(session, original, mtn_rec, smtn_rec, csmtn_rec, csmuk_rec):
return ({ 'session': session, 'original': original, 'mtn_rec': mtn_rec.tolist(), 'smtn_rec': smtn_rec.tolist(), 'csmtn_rec': csmtn_rec.tolist(), 'csmuk_rec': csmtn_rec.tolist()})
def execute_algo(train, test, songs, topN, k_sim, data, pwd):
m2vTN = []
sm2vTN = []
csm2vTN = []
csm2vUK = []
u_songs = data.us_matrix()
users = data.uu_matrix()
def report_users(num_users):
def f_aux(ix_user, user_id, algo):
return '[{}/{}] Running algorithm {} for user {}!'.format(ix_user, num_users,algo, user_id)
return f_aux
num_users = len(test)
rep = report_users(num_users)
u = 1
def pref(u, k_similar, song):
listened_to = [(k, u_songs[k, data.song_ix(song)] == 1) for k in k_similar]
sum_sims = 0
for u_k, listen in listened_to:
if listen == True:
sum_sims += users[u][u_k] / [v[1] for v in listened_to].count(True)
return sum_sims
for user in test:
f = open(pwd + '/' + user.replace('/', '_'), 'wb')
pickle.dump({}, f, protocol=pickle.HIGHEST_PROTOCOL)
f.close()
print(rep(u, user, 'M-TN'), flush=False, end='\r')
user_cos = cosine_similarity(data.u_pref(user).reshape(1, -1), data.m2v_songs)[0]
user_tn = data.get_n_largest(user_cos, topN)
sim_ix = np.argpartition(users[data.ix_user(user)], -k_sim)[-k_sim:]
song_sim = np.array([pref(data.ix_user(user), sim_ix, s) for s in songs.index.values])
to_write = []
s = 1
sessions = data.user_sessions(user)
for (train_songs, test_songs) in sessions:
if len(train_songs) > 0:
m2vTN.append(get_metrics(user_tn, test_songs))
c_pref = data.c_pref(train_songs)
print(rep(u, user, 'SM-TN'), flush=False, end='\r')
con_cos = cosine_similarity(c_pref.reshape(1, -1), data.sm2v_songs)[0]
cos_tn = data.get_n_largest(con_cos, topN)
sm2vTN.append(get_metrics(cos_tn, test_songs))
print(rep(u, user, 'CSM-TN'), flush=False, end='\r')
f_cos = np.sum([user_cos, con_cos], axis=0)
both_tn = data.get_n_largest(f_cos, topN)
csm2vTN.append(get_metrics(both_tn, test_songs))
print(rep(u, user, 'CSM-UK'), flush=False, end='\r')
UK_cos = np.sum([song_sim, con_cos], axis=0)
uk_tn = data.get_n_largest(UK_cos, topN)
csm2vUK.append(get_metrics(uk_tn, test_songs))
to_write.append(recs(s, test_songs, user_tn, cos_tn, both_tn, uk_tn))
s+=1
write_rec(pwd + '/' + user.replace('/', '_'), to_write)
u+=1
m_m2vTN = np.mean(m2vTN, axis=0).tolist()
m_sm2vTN = np.mean(sm2vTN, axis=0).tolist()
m_csm2vTN = np.mean(csm2vTN, axis=0).tolist()
m_csm2vUK = np.mean(csm2vUK, axis=0).tolist()
return (m_m2vTN, m_sm2vTN, m_csm2vTN, m_csm2vUK)
|