|
from glob import glob |
|
from sklearn.metrics import accuracy_score, recall_score, f1_score |
|
import os |
|
import pandas as pd |
|
|
|
def get_merged_df(results_path, skip_samples = True): |
|
results = glob(os.path.join(results_path, "*.json")) |
|
dfs = [] |
|
for r in results: |
|
if skip_samples and 'sample_result' in r: |
|
continue |
|
df = pd.read_json(r, lines = True) |
|
if df.drop(['algorithm'], axis = 1).isna().values.any(): |
|
print(f"Missing values in {r}") |
|
else: |
|
dfs.append(df) |
|
full_df = pd.concat(dfs) |
|
return full_df |
|
|
|
def map_df(full_df): |
|
gnd_truth_mapping = {'full fake': 1, |
|
'half fake': 1, |
|
'mostly fake': 1, |
|
'real': 0} |
|
|
|
pred_mapping = {'fake':1, 'real': 0} |
|
|
|
|
|
full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping) |
|
full_df['pred'] = full_df['type'].map(pred_mapping) |
|
|
|
return full_df |
|
|
|
def get_duration_scores(df): |
|
|
|
columns = ['Under 26 s', '55 s', '125 s', 'Overall' ] |
|
samples_tested = [] |
|
acc_scores = [] |
|
|
|
for c in columns: |
|
|
|
if c == 'Overall': |
|
mask = df.gnd_truth == 0 |
|
elif c == 'Under 26 s': |
|
mask = (df.gnd_truth == 0) & (df.duration < 26) |
|
elif c == '55 s': |
|
mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56) |
|
elif c == '125 s': |
|
mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126) |
|
else: |
|
raise ValueError |
|
sel_df = df[mask] |
|
|
|
samples_tested.append(len(sel_df)) |
|
acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) |
|
|
|
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) |
|
return lb |
|
|
|
def get_algorithm_scores_v1(df): |
|
|
|
columns = list(df[df.label != 'real'].algorithm.unique()) |
|
samples_tested = [] |
|
acc_scores = [] |
|
rec_scores = [] |
|
|
|
for c in columns: |
|
|
|
|
|
mask = (df.algorithm == c) | (df.label == 'real') |
|
sel_df = df[mask] |
|
|
|
samples_tested.append(len(sel_df[sel_df.label != 'real'])) |
|
rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) |
|
|
|
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores}) |
|
return lb |
|
|
|
def get_algorithm_scores_v2(df): |
|
|
|
columns = list(df[df.label != 'real'].algorithm.unique()) |
|
columns2 = list(df[df.label != 'real'].label.unique()) |
|
samples_tested = [] |
|
acc_scores = [] |
|
tpr_scores = [] |
|
tnr_scores = [float('nan')]*(len(columns) + len(columns2)) |
|
f1_scores = [float('nan')]*(len(columns) + len(columns2)) |
|
|
|
for c in columns: |
|
mask = (df.algorithm == c) |
|
sel_df = df[mask] |
|
|
|
samples_tested.append(len(sel_df)) |
|
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1, average="binary"), 3)) |
|
|
|
|
|
for c in columns2: |
|
mask = (df.label == c) |
|
sel_df = df[mask] |
|
|
|
samples_tested.append(len(sel_df)) |
|
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1, average="binary"), 3)) |
|
|
|
mask = (df.label != "real") |
|
sel_df = df[mask] |
|
|
|
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1, average="binary"), 3)) |
|
|
|
mask = (df.label == "real") |
|
sel_df = df[mask] |
|
|
|
tnr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=0, average="binary"), 3)) |
|
|
|
sel_df = df.copy() |
|
samples_tested.append(len(sel_df)) |
|
f1_scores.append(round(f1_score(sel_df.gnd_truth.values, sel_df.pred.values, average="binary"), 3)) |
|
|
|
|
|
lb = pd.DataFrame({"Sample": columns + columns2 + ["overall"], "Num Samples": samples_tested, |
|
"TPR": tpr_scores, "TNR": tnr_scores, "F1": f1_scores}) |
|
return lb |
|
|
|
def build_leaderboard(results_path = 'results'): |
|
full_df = get_merged_df(results_path) |
|
full_df_mapped = map_df(full_df) |
|
leaderboard = get_algorithm_scores_v2(full_df_mapped) |
|
return leaderboard |
|
|