File size: 2,530 Bytes
9c58361 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import sys
import csv
import os
import yaml
import pickle
import numpy as np
import pandas as pd
import project.evaluation.metrics as m
from os.path import exists
from project.data.preparation import prepare_data, get_embeddings
from project.recsys.helper import Helper
from datetime import datetime
from project.recsys.algorithms import execute_algo
from project.evaluation.ResultReport import Results
from keras.models import model_from_yaml
def get_rnn():
model = model_from_yaml(open('training_model.yaml','r'))
model.load_weights('training_weights.h5')
return model
def skip_all(executed, params, k):
folds = executed[executed['params'] == params]['folds']
return folds.max() == k
def skip_fold(executed, params, fold):
folds = executed[executed['params'] == params]['folds']
return folds.max() >= fold
def cross_validation(df, conf, setups):
params = conf['evaluation']
r_paths = conf['results']
kfold = prepare_data(df, conf)
dataset = params['dataset']
topN = int(params['topN'])
k = int(params['k'])
results = Results(setups, k)
exec_path = r_paths['full']
pwd_rec = 'tmp/{}/rec/'.format(dataset)
if not exists(pwd_rec):
os.mkdir(pwd_rec)
if not exists(exec_path):
pd.DataFrame({},columns=['params','algo','folds','prec','rec','f1','map','ndcg@5','p@5']).to_csv(exec_path,index=None,sep='\t')
executed = pd.read_csv(exec_path, sep='\t')
for setup in setups:
_, params, path = setup
if not exists(pwd_rec + params):
os.mkdir(pwd_rec + params)
if skip_all(executed, params, k):
continue
songs = df['song'].unique().tolist()
m2v, sm2v = get_embeddings(path, songs)
songs = pd.DataFrame({ 'm2v': [m2v[x] for x in songs], 'sm2v': [sm2v[x] for x in songs]}, index=songs, columns=['m2v','sm2v'])
fold = 1
for train, test in kfold:
if skip_fold(executed, params, fold):
fold+=1
continue
time = datetime.now().strftime('%d/%m/%Y %H:%M')
print('%s | fold-%d | Running recsys w/ k-fold with the following params: %s' % (time, fold, params))
helper = Helper(train, test, songs, dataset)
m2vTN, sm2vTN, csm2vTN, csm2vUK = execute_algo(train.index, test.index, songs, topN, k, helper, pwd_rec + params)
res = results.fold_results(params, m2vTN, sm2vTN, csm2vTN, csm2vUK, fold)
res.to_csv(exec_path, sep='\t', mode='a', index=None, header=None)
fold+=1
|