import numpy as np from scipy.stats import spearmanr from sklearn.metrics import r2_score, roc_auc_score, ndcg_score SPEARMAN_FRACTIONS = np.linspace(0.1, 1.0, 10) def spearman(y_pred, y_true): if np.var(y_pred) < 1e-6 or np.var(y_true) < 1e-6: return 0.0 return spearmanr(y_pred, y_true).correlation def spearman_scoring_fn(sklearn_estimator, X, y): return spearman(sklearn_estimator.predict(X), y) def ndcg(y_pred, y_true): y_true_normalized = (y_true - y_true.mean()) / y_true.std() return ndcg_score(y_true_normalized.reshape(1, -1), y_pred.reshape(1, -1)) def topk_mean(y_pred, y_true, topk=96): return np.mean(y_true[np.argsort(y_pred)[-topk:]]) def r2(y_pred, y_true): return r2_score(y_true, y_pred) def hit_rate(y_pred, y_true, y_ref=0.0, topk=96): n_above = np.sum(y_true[np.argsort(y_pred)[-topk:]] > y_ref) return float(n_above) / float(topk) def aucroc(y_pred, y_true, y_cutoff): y_true_bin = (y_true >= y_cutoff) return roc_auc_score(y_true_bin, y_pred, average='micro') def get_spearman_fractions(y_pred, y_true): results = np.zeros(len(SPEARMAN_FRACTIONS)) for i, f in enumerate(SPEARMAN_FRACTIONS): k = int(f * len(y_true)) idx = np.argsort(y_true)[-k:] results[i] = spearmanr(y_pred[idx], y_true[idx]).correlation return results def wt_improvement_metric(y_pred, y_true, y_wt, topk=96): hr = hit_rate(y_pred, y_true, y_wt, topk) baseline = float(np.sum(y_true > y_wt)) / len(y_true) return hr / baseline def topk_median(y_pred, y_true, topk=96): return np.median(y_true[np.argsort(y_pred)[-topk:]])