gzhong's picture
Upload folder using huggingface_hub
7718235 verified
import numpy as np
from scipy.stats import spearmanr
from sklearn.metrics import r2_score, roc_auc_score, ndcg_score
SPEARMAN_FRACTIONS = np.linspace(0.1, 1.0, 10)
def spearman(y_pred, y_true):
if np.var(y_pred) < 1e-6 or np.var(y_true) < 1e-6:
return 0.0
return spearmanr(y_pred, y_true).correlation
def spearman_scoring_fn(sklearn_estimator, X, y):
return spearman(sklearn_estimator.predict(X), y)
def ndcg(y_pred, y_true):
y_true_normalized = (y_true - y_true.mean()) / y_true.std()
return ndcg_score(y_true_normalized.reshape(1, -1), y_pred.reshape(1, -1))
def topk_mean(y_pred, y_true, topk=96):
return np.mean(y_true[np.argsort(y_pred)[-topk:]])
def r2(y_pred, y_true):
return r2_score(y_true, y_pred)
def hit_rate(y_pred, y_true, y_ref=0.0, topk=96):
n_above = np.sum(y_true[np.argsort(y_pred)[-topk:]] > y_ref)
return float(n_above) / float(topk)
def aucroc(y_pred, y_true, y_cutoff):
y_true_bin = (y_true >= y_cutoff)
return roc_auc_score(y_true_bin, y_pred, average='micro')
def get_spearman_fractions(y_pred, y_true):
results = np.zeros(len(SPEARMAN_FRACTIONS))
for i, f in enumerate(SPEARMAN_FRACTIONS):
k = int(f * len(y_true))
idx = np.argsort(y_true)[-k:]
results[i] = spearmanr(y_pred[idx], y_true[idx]).correlation
return results
def wt_improvement_metric(y_pred, y_true, y_wt, topk=96):
hr = hit_rate(y_pred, y_true, y_wt, topk)
baseline = float(np.sum(y_true > y_wt)) / len(y_true)
return hr / baseline
def topk_median(y_pred, y_true, topk=96):
return np.median(y_true[np.argsort(y_pred)[-topk:]])