Spaces:
Build error
Build error
from glob import glob | |
from sklearn.metrics import accuracy_score | |
import os | |
import pandas as pd | |
def get_merged_df(results_path, skip_samples = False): | |
results = glob(os.path.join(results_path, "*.json")) | |
dfs = [] | |
for r in results: | |
if skip_samples and 'sample_result' in r: | |
continue | |
df = pd.read_json(r, lines = True) | |
if df.isna().values.any(): | |
print(f"Missing values in {r}") | |
else: | |
dfs.append(df) | |
full_df = pd.concat(dfs) | |
return full_df | |
def map_df(full_df): | |
gnd_truth_mapping = {'full fake': 0, | |
'half fake': 0, | |
'real': 1} | |
pred_mapping = {'fake':0, 'real': 1} | |
full_df['label'] = full_df['ground_truth'].map(gnd_truth_mapping) | |
full_df['pred'] = full_df['type'].map(pred_mapping) | |
return full_df | |
def get_scores(df): | |
columns = ['Under 25s', '26s - 55s', '56s - 125s', 'Overall' ] | |
samples_tested = [] | |
acc_scores = [] | |
for c in columns: | |
if c == 'Overall': | |
mask = df.label == 0 | |
elif c == 'Under 25s': | |
mask = (df.label == 0) & (df.duration < 26) | |
elif c == '26s - 55s': | |
mask = (df.label == 0) & (df.duration >= 26) & (df.duration < 56) | |
elif c == '56s - 125s': | |
mask = (df.label == 0) & (df.duration >= 56) & (df.duration < 126) | |
else: | |
raise ValueError | |
sel_df = df[mask] | |
samples_tested.append(len(sel_df)) | |
acc_scores.append(round(accuracy_score(sel_df.label.values, sel_df.pred.values), 3)) | |
lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) | |
return lb | |
def build_leaderboard(results_path = 'results'): | |
full_df = get_merged_df(results_path) | |
full_df_mapped = map_df(full_df) | |
leaderboard = get_scores(full_df_mapped) | |
return leaderboard | |