import collections import pandas as pd def get_leaderboard_filters(df, categories) -> dict[str, list[str]]: # Create groups based on categories groups = collections.OrderedDict({"Overall": set()}) for k in categories.values(): groups[k] = set() default_selection = set() for k, v in categories.items(): if v not in default_selection: for k in list(df.columns): if k.startswith(v): groups["Overall"].add(k) default_selection.add(k) for col in list(df.columns): for k in categories.keys(): if col.startswith(k): cat = categories[k] groups[cat].add(col) break return groups, default_selection def add_avg_as_columns( benchmark_df: pd.DataFrame, attack_scores: list[str] ) -> pd.DataFrame: # average over the attack variants (inequal number of attack variants) attack_avg_df = ( benchmark_df[["model", "attack_name"] + attack_scores] .groupby(["model", "attack_name"]) .agg(lambda x: x.mean(skipna=False)) .reset_index(drop=False) ) avg_df = ( attack_avg_df[["model"] + attack_scores] .groupby(["model"]) .agg(lambda x: x.mean(skipna=False)) .reset_index(drop=False) ) return avg_df.rename(columns={s: f"avg_{s}" for s in attack_scores}) def add_attack_variants_as_columns( df: pd.DataFrame, first_cols: list[str], attack_scores: list[str] ) -> pd.DataFrame: model_dfs = [] for model in df.model.unique(): model_view = df[df.model == model] attack_dfs = [] for i, row in model_view.iterrows(): attack_name = row["attack_name"] attack_variant = row["attack_variant"] g_df = model_view[ (model_view.attack_name == attack_name) & (model_view.attack_variant == attack_variant) ] if ( attack_name == "none" or attack_name == "identity" or attack_name == "Identity" ): g_df = g_df[["model"] + first_cols + attack_scores] else: g_df = g_df[attack_scores] if attack_variant == "default": prefix = attack_name else: prefix = f"{attack_name}_{attack_variant}" g_df = g_df.rename(columns={s: f"{prefix}_{s}" for s in attack_scores}) attack_dfs.append(g_df.reset_index(drop=True)) model_df = pd.concat(attack_dfs, axis=1) model_dfs.append(model_df) final_df = pd.concat(model_dfs, axis=0, ignore_index=True) first_cols_ = ["model"] + first_cols reordered_cols = first_cols_ + list( set(final_df.columns.tolist()) - set(first_cols_) ) return final_df[reordered_cols] def add_attack_categories_as_columns( benchmark_df: pd.DataFrame, attack_scores: list[str] ) -> pd.DataFrame: # average over the attack variants (inequal number of attack variants) attack_avg_df = ( benchmark_df[["model", "attack_name", "cat"] + attack_scores] .groupby(["model", "attack_name", "cat"]) .agg(lambda x: x.mean(skipna=False)) .reset_index(drop=False) ) df = ( attack_avg_df.groupby(["model", "cat"])[attack_scores] .agg(lambda x: x.mean(skipna=False)) .reset_index() ) model_dfs = [] for model in df.model.unique(): cat_dfs = [] for cat in df.cat.unique(): if cat == "None": continue cat_df = df[(df.model == model) & (df.cat == cat)] cat_df = cat_df[attack_scores] cat_df = cat_df.rename(columns={s: f"{cat}_{s}" for s in attack_scores}) cat_dfs.append(cat_df.reset_index(drop=True)) model_dfs.append(pd.concat(cat_dfs, axis=1)) return pd.concat(model_dfs, axis=0, ignore_index=True) def get_old_format_dataframe( benchmark_df: pd.DataFrame, first_cols: list[str], attack_scores: list[str] ) -> pd.DataFrame: benchmark_df = benchmark_df.fillna("None") avg_df = add_avg_as_columns(benchmark_df, attack_scores) attack_variants_df = add_attack_variants_as_columns( benchmark_df, first_cols, attack_scores ) categories_df = add_attack_categories_as_columns(benchmark_df, attack_scores) final_df = pd.concat([attack_variants_df, categories_df, avg_df], axis=1) final_df = final_df.loc[:, ~final_df.columns.duplicated()].copy() return final_df