File size: 2,363 Bytes
2290cee cce1b85 2290cee cce1b85 2290cee f3e3e40 2290cee cce1b85 2290cee cce1b85 2290cee f811658 2290cee cce1b85 2290cee 4de2d22 2290cee f811658 b45db7b f811658 b45db7b f811658 b45db7b f811658 2290cee f811658 2290cee cce1b85 2290cee cce1b85 2290cee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from glob import glob
from sklearn.metrics import accuracy_score, recall_score
import os
import pandas as pd
def get_merged_df(results_path, skip_samples = True):
results = glob(os.path.join(results_path, "*.json"))
dfs = []
for r in results:
if skip_samples and 'sample_result' in r:
continue
df = pd.read_json(r, lines = True)
if df.drop(['algorithm'], axis = 1).isna().values.any():
print(f"Missing values in {r}")
else:
dfs.append(df)
full_df = pd.concat(dfs)
return full_df
def map_df(full_df):
gnd_truth_mapping = {'full fake': 1,
'half fake': 1,
'real': 0}
pred_mapping = {'fake':1, 'real': 0}
full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping)
full_df['pred'] = full_df['type'].map(pred_mapping)
return full_df
def get_duration_scores(df):
columns = ['Under 26 s', '55 s', '125 s', 'Overall' ]
samples_tested = []
acc_scores = []
for c in columns:
if c == 'Overall':
mask = df.gnd_truth == 0
elif c == 'Under 26 s':
mask = (df.gnd_truth == 0) & (df.duration < 26)
elif c == '55 s':
mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56)
elif c == '125 s':
mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126)
else:
raise ValueError
sel_df = df[mask]
samples_tested.append(len(sel_df))
acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores})
return lb
def get_algorithm_scores(df):
columns = list(df[df.label != 'real'].algorithm.unique())
samples_tested = []
acc_scores = []
rec_scores = []
for c in columns:
mask = (df.algorithm == c) | (df.label == 'real')
sel_df = df[mask]
samples_tested.append(len(sel_df[sel_df.label != 'real']))
rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores})
return lb
def build_leaderboard(results_path = 'results'):
full_df = get_merged_df(results_path)
full_df_mapped = map_df(full_df)
leaderboard = get_algorithm_scores(full_df_mapped)
return leaderboard
|