from glob import glob from sklearn.metrics import accuracy_score import os import pandas as pd def get_merged_df(results_path, skip_samples = False): results = glob(os.path.join(results_path, "*.json")) dfs = [] for r in results: if skip_samples and 'sample_result' in r: continue df = pd.read_json(r, lines = True) if df.isna().values.any(): print(f"Missing values in {r}") else: dfs.append(df) full_df = pd.concat(dfs) return full_df def map_df(full_df): gnd_truth_mapping = {'full fake': 0, 'half fake': 0, 'real': 1} pred_mapping = {'fake':0, 'real': 1} full_df['label'] = full_df['ground_truth'].map(gnd_truth_mapping) full_df['pred'] = full_df['type'].map(pred_mapping) return full_df def get_scores(df): columns = ['Under 25s', '26s - 55s', '56s - 125s', 'Overall' ] samples_tested = [] acc_scores = [] for c in columns: if c == 'Overall': mask = df.label == 0 elif c == 'Under 25s': mask = (df.label == 0) & (df.duration < 26) elif c == '26s - 55s': mask = (df.label == 0) & (df.duration >= 26) & (df.duration < 56) elif c == '56s - 125s': mask = (df.label == 0) & (df.duration >= 56) & (df.duration < 126) else: raise ValueError sel_df = df[mask] samples_tested.append(len(sel_df)) acc_scores.append(round(accuracy_score(sel_df.label.values, sel_df.pred.values), 3)) lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) return lb def build_leaderboard(results_path = 'results'): full_df = get_merged_df(results_path) full_df_mapped = map_df(full_df) leaderboard = get_scores(full_df_mapped) return leaderboard