Spaces:
Build error
Build error
from glob import glob | |
from sklearn.metrics import accuracy_score, recall_score, f1_score | |
import os | |
import pandas as pd | |
def get_merged_df(results_path, skip_samples = True): | |
results = glob(os.path.join(results_path, "*.json")) | |
dfs = [] | |
for r in results: | |
if skip_samples and 'sample_result' in r: | |
continue | |
df = pd.read_json(r, lines = True) | |
if df.drop(['algorithm'], axis = 1).isna().values.any(): | |
print(f"Missing values in {r}") | |
else: | |
dfs.append(df) | |
full_df = pd.concat(dfs) | |
return full_df | |
def map_df(full_df): | |
gnd_truth_mapping = {'full fake': 1, | |
'half fake': 1, | |
'mostly fake': 1, | |
'real': 0} | |
pred_mapping = {'fake':1, 'real': 0} | |
full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping) | |
full_df['pred'] = full_df['type'].map(pred_mapping) | |
return full_df | |
def get_duration_scores(df): | |
columns = ['Under 26 s', '55 s', '125 s', 'Overall' ] | |
samples_tested = [] | |
acc_scores = [] | |
for c in columns: | |
if c == 'Overall': | |
mask = df.gnd_truth == 0 | |
elif c == 'Under 26 s': | |
mask = (df.gnd_truth == 0) & (df.duration < 26) | |
elif c == '55 s': | |
mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56) | |
elif c == '125 s': | |
mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126) | |
else: | |
raise ValueError | |
sel_df = df[mask] | |
samples_tested.append(len(sel_df)) | |
acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) | |
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) | |
return lb | |
def get_algorithm_scores_v1(df): | |
columns = list(df[df.label != 'real'].algorithm.unique()) | |
samples_tested = [] | |
acc_scores = [] | |
rec_scores = [] | |
for c in columns: | |
mask = (df.algorithm == c) | (df.label == 'real') | |
sel_df = df[mask] | |
samples_tested.append(len(sel_df[sel_df.label != 'real'])) | |
rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3)) | |
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores}) | |
return lb | |
def get_algorithm_scores_v2(df): | |
columns = list(df[df.label != 'real'].algorithm.unique()) | |
columns2 = list(df[df.label != 'real'].label.unique()) | |
samples_tested = [] | |
acc_scores = [] | |
tpr_scores = [] | |
tnr_scores = [float('nan')]*(len(columns) + len(columns2)) | |
f1_scores = [float('nan')]*(len(columns) + len(columns2)) | |
for c in columns: | |
mask = (df.algorithm == c) | |
sel_df = df[mask] | |
samples_tested.append(len(sel_df)) | |
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
for c in columns2: | |
mask = (df.label == c) | |
sel_df = df[mask] | |
samples_tested.append(len(sel_df)) | |
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
mask = (df.label != "real") | |
sel_df = df[mask] | |
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3)) | |
mask = (df.label == "real") | |
sel_df = df[mask] | |
tnr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=0), 3)) | |
sel_df = df.copy() | |
samples_tested.append(len(sel_df)) | |
f1_scores.append(round(f1_score(sel_df.gnd_truth.values, sel_df.pred.values, average="macro"), 3)) | |
lb = pd.DataFrame({"Sample": columns + columns2 + ["overall (real + fake)"], "Num Samples": samples_tested, | |
"TPR": tpr_scores, "TNR": tnr_scores, "F1": f1_scores}) | |
return lb | |
def build_leaderboard(results_path = 'results'): | |
full_df = get_merged_df(results_path) | |
full_df_mapped = map_df(full_df) | |
leaderboard = get_algorithm_scores_v2(full_df_mapped) | |
return leaderboard | |