|
import os |
|
import sys |
|
import time |
|
import yaml |
|
import pickle |
|
import multiprocessing as mp |
|
import numpy as np |
|
from project.evaluation.metrics import get_metrics |
|
from datetime import datetime |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
def write_rec(pwd, sessions): |
|
f = open(pwd, 'wb') |
|
pickle.dump(sessions, f, protocol=pickle.HIGHEST_PROTOCOL) |
|
f.close() |
|
|
|
def recs(session, original, mtn_rec, smtn_rec, csmtn_rec, csmuk_rec): |
|
return ({ 'session': session, 'original': original, 'mtn_rec': mtn_rec.tolist(), 'smtn_rec': smtn_rec.tolist(), 'csmtn_rec': csmtn_rec.tolist(), 'csmuk_rec': csmtn_rec.tolist()}) |
|
|
|
def execute_algo(train, test, songs, topN, k_sim, data, pwd): |
|
|
|
m2vTN = [] |
|
sm2vTN = [] |
|
csm2vTN = [] |
|
csm2vUK = [] |
|
|
|
u_songs = data.us_matrix() |
|
users = data.uu_matrix() |
|
|
|
def report_users(num_users): |
|
def f_aux(ix_user, user_id, algo): |
|
return '[{}/{}] Running algorithm {} for user {}!'.format(ix_user, num_users,algo, user_id) |
|
return f_aux |
|
|
|
num_users = len(test) |
|
rep = report_users(num_users) |
|
u = 1 |
|
|
|
def pref(u, k_similar, song): |
|
listened_to = [(k, u_songs[k, data.song_ix(song)] == 1) for k in k_similar] |
|
sum_sims = 0 |
|
for u_k, listen in listened_to: |
|
if listen == True: |
|
sum_sims += users[u][u_k] / [v[1] for v in listened_to].count(True) |
|
return sum_sims |
|
|
|
|
|
for user in test: |
|
f = open(pwd + '/' + user.replace('/', '_'), 'wb') |
|
pickle.dump({}, f, protocol=pickle.HIGHEST_PROTOCOL) |
|
f.close() |
|
|
|
print(rep(u, user, 'M-TN'), flush=False, end='\r') |
|
user_cos = cosine_similarity(data.u_pref(user).reshape(1, -1), data.m2v_songs)[0] |
|
user_tn = data.get_n_largest(user_cos, topN) |
|
|
|
sim_ix = np.argpartition(users[data.ix_user(user)], -k_sim)[-k_sim:] |
|
song_sim = np.array([pref(data.ix_user(user), sim_ix, s) for s in songs.index.values]) |
|
to_write = [] |
|
s = 1 |
|
|
|
sessions = data.user_sessions(user) |
|
for (train_songs, test_songs) in sessions: |
|
if len(train_songs) > 0: |
|
m2vTN.append(get_metrics(user_tn, test_songs)) |
|
c_pref = data.c_pref(train_songs) |
|
|
|
print(rep(u, user, 'SM-TN'), flush=False, end='\r') |
|
con_cos = cosine_similarity(c_pref.reshape(1, -1), data.sm2v_songs)[0] |
|
cos_tn = data.get_n_largest(con_cos, topN) |
|
sm2vTN.append(get_metrics(cos_tn, test_songs)) |
|
|
|
print(rep(u, user, 'CSM-TN'), flush=False, end='\r') |
|
f_cos = np.sum([user_cos, con_cos], axis=0) |
|
both_tn = data.get_n_largest(f_cos, topN) |
|
csm2vTN.append(get_metrics(both_tn, test_songs)) |
|
|
|
print(rep(u, user, 'CSM-UK'), flush=False, end='\r') |
|
UK_cos = np.sum([song_sim, con_cos], axis=0) |
|
uk_tn = data.get_n_largest(UK_cos, topN) |
|
csm2vUK.append(get_metrics(uk_tn, test_songs)) |
|
to_write.append(recs(s, test_songs, user_tn, cos_tn, both_tn, uk_tn)) |
|
s+=1 |
|
write_rec(pwd + '/' + user.replace('/', '_'), to_write) |
|
u+=1 |
|
|
|
m_m2vTN = np.mean(m2vTN, axis=0).tolist() |
|
m_sm2vTN = np.mean(sm2vTN, axis=0).tolist() |
|
m_csm2vTN = np.mean(csm2vTN, axis=0).tolist() |
|
m_csm2vUK = np.mean(csm2vUK, axis=0).tolist() |
|
return (m_m2vTN, m_sm2vTN, m_csm2vTN, m_csm2vUK) |
|
|