import sys |
import csv |
import os |
import yaml |
import pickle |
import numpy as np |
import pandas as pd |
import project.evaluation.metrics as m |
from os.path import exists |
from project.data.preparation import prepare_data, get_embeddings |
from project.recsys.helper import Helper |
from datetime import datetime |
from project.recsys.algorithms import execute_algo |
from project.evaluation.ResultReport import Results |
from keras.models import model_from_yaml |
def get_rnn(): |
model = model_from_yaml(open('training_model.yaml','r')) |
model.load_weights('training_weights.h5') |
return model |
def skip_all(executed, params, k): |
folds = executed[executed['params'] == params]['folds'] |
return folds.max() == k |
def skip_fold(executed, params, fold): |
folds = executed[executed['params'] == params]['folds'] |
return folds.max() >= fold |
def cross_validation(df, conf, setups): |
params = conf['evaluation'] |
r_paths = conf['results'] |
kfold = prepare_data(df, conf) |
dataset = params['dataset'] |
topN = int(params['topN']) |
k = int(params['k']) |
results = Results(setups, k) |
exec_path = r_paths['full'] |
pwd_rec = 'tmp/{}/rec/'.format(dataset) |
if not exists(pwd_rec): |
os.mkdir(pwd_rec) |
if not exists(exec_path): |
pd.DataFrame({},columns=['params','algo','folds','prec','rec','f1','map','ndcg@5','p@5']).to_csv(exec_path,index=None,sep='\t') |
executed = pd.read_csv(exec_path, sep='\t') |
for setup in setups: |
_, params, path = setup |
if not exists(pwd_rec + params): |
os.mkdir(pwd_rec + params) |
if skip_all(executed, params, k): |
continue |
songs = df['song'].unique().tolist() |
m2v, sm2v = get_embeddings(path, songs) |
songs = pd.DataFrame({ 'm2v': [m2v[x] for x in songs], 'sm2v': [sm2v[x] for x in songs]}, index=songs, columns=['m2v','sm2v']) |
fold = 1 |
for train, test in kfold: |
if skip_fold(executed, params, fold): |
fold+=1 |
continue |
time = datetime.now().strftime('%d/%m/%Y %H:%M') |
print('%s | fold-%d | Running recsys w/ k-fold with the following params: %s' % (time, fold, params)) |
helper = Helper(train, test, songs, dataset) |
m2vTN, sm2vTN, csm2vTN, csm2vUK = execute_algo(train.index, test.index, songs, topN, k, helper, pwd_rec + params) |
res = results.fold_results(params, m2vTN, sm2vTN, csm2vTN, csm2vUK, fold) |
res.to_csv(exec_path, sep='\t', mode='a', index=None, header=None) |
fold+=1 |