File size: 3,924 Bytes
2290cee f3cf0e1 2290cee cce1b85 2290cee f3e3e40 2290cee cce1b85 1303f07 cce1b85 2290cee cce1b85 2290cee f811658 2290cee cce1b85 2290cee 4de2d22 2290cee f811658 b45db7b f811658 b45db7b f811658 b45db7b f811658 2290cee f811658 2290cee f3cf0e1 cce1b85 f3cf0e1 93b2487 f3cf0e1 2290cee f3cf0e1 2290cee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from glob import glob
from sklearn.metrics import accuracy_score, recall_score, f1_score
import os
import pandas as pd
def get_merged_df(results_path, skip_samples = True):
results = glob(os.path.join(results_path, "*.json"))
dfs = []
for r in results:
if skip_samples and 'sample_result' in r:
continue
df = pd.read_json(r, lines = True)
if df.drop(['algorithm'], axis = 1).isna().values.any():
print(f"Missing values in {r}")
else:
dfs.append(df)
full_df = pd.concat(dfs)
return full_df
def map_df(full_df):
gnd_truth_mapping = {'full fake': 1,
'half fake': 1,
'mostly fake': 1,
'real': 0}
pred_mapping = {'fake':1, 'real': 0}
full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping)
full_df['pred'] = full_df['type'].map(pred_mapping)
return full_df
def get_duration_scores(df):
columns = ['Under 26 s', '55 s', '125 s', 'Overall' ]
samples_tested = []
acc_scores = []
for c in columns:
if c == 'Overall':
mask = df.gnd_truth == 0
elif c == 'Under 26 s':
mask = (df.gnd_truth == 0) & (df.duration < 26)
elif c == '55 s':
mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56)
elif c == '125 s':
mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126)
else:
raise ValueError
sel_df = df[mask]
samples_tested.append(len(sel_df))
acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores})
return lb
def get_algorithm_scores_v1(df):
columns = list(df[df.label != 'real'].algorithm.unique())
samples_tested = []
acc_scores = []
rec_scores = []
for c in columns:
mask = (df.algorithm == c) | (df.label == 'real')
sel_df = df[mask]
samples_tested.append(len(sel_df[sel_df.label != 'real']))
rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores})
return lb
def get_algorithm_scores_v2(df):
columns = list(df[df.label != 'real'].algorithm.unique())
columns2 = list(df[df.label != 'real'].label.unique())
samples_tested = []
acc_scores = []
tpr_scores = []
tnr_scores = [float('nan')]*(len(columns) + len(columns2))
f1_scores = [float('nan')]*(len(columns) + len(columns2))
for c in columns:
mask = (df.algorithm == c)
sel_df = df[mask]
samples_tested.append(len(sel_df))
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))
for c in columns2:
mask = (df.label == c)
sel_df = df[mask]
samples_tested.append(len(sel_df))
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))
mask = (df.label != "real")
sel_df = df[mask]
tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))
mask = (df.label == "real")
sel_df = df[mask]
tnr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=0), 3))
sel_df = df.copy()
samples_tested.append(len(sel_df))
f1_scores.append(round(f1_score(sel_df.gnd_truth.values, sel_df.pred.values, average="macro"), 3))
lb = pd.DataFrame({"Sample": columns + columns2 + ["overall"], "Num Samples": samples_tested,
"TPR": tpr_scores, "TNR": tnr_scores, "F1": f1_scores})
return lb
def build_leaderboard(results_path = 'results'):
full_df = get_merged_df(results_path)
full_df_mapped = map_df(full_df)
leaderboard = get_algorithm_scores_v2(full_df_mapped)
return leaderboard
|