Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix: fix the bug in selecting reranking models
Browse files- src/utils.py +11 -6
src/utils.py
CHANGED
@@ -13,9 +13,17 @@ from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC,
|
|
13 |
from src.envs import API, SEARCH_RESULTS_REPO
|
14 |
from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
18 |
-
return df.loc[df[
|
19 |
|
20 |
|
21 |
def filter_queries(query: str, df: pd.DataFrame) -> pd.DataFrame:
|
@@ -99,7 +107,6 @@ def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, t
|
|
99 |
selected_cols.append(c)
|
100 |
# We use COLS to maintain sorting
|
101 |
filtered_df = df[FIXED_COLS + selected_cols]
|
102 |
-
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].mean(axis=1, numeric_only=True).round(decimals=2)
|
103 |
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
|
104 |
filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
|
105 |
filtered_df.reset_index(inplace=True, drop=True)
|
@@ -116,14 +123,12 @@ def update_table(
|
|
116 |
query: str,
|
117 |
show_anonymous: bool
|
118 |
):
|
119 |
-
filtered_df = hidden_df
|
120 |
if not show_anonymous:
|
121 |
-
filtered_df = hidden_df.copy()
|
122 |
filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
|
123 |
filtered_df = filter_models(filtered_df, reranking_query)
|
124 |
filtered_df = filter_queries(query, filtered_df)
|
125 |
-
|
126 |
-
return df
|
127 |
|
128 |
|
129 |
def update_table_long_doc(
|
|
|
13 |
from src.envs import API, SEARCH_RESULTS_REPO
|
14 |
from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
|
15 |
|
16 |
+
import re
|
17 |
+
|
18 |
+
|
19 |
+
def remove_html(input_str):
|
20 |
+
# Regular expression for finding HTML tags
|
21 |
+
clean = re.sub(r'<.*?>', '', input_str)
|
22 |
+
return clean
|
23 |
+
|
24 |
|
25 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
26 |
+
return df.loc[df[COL_NAME_RERANKING_MODEL].apply(remove_html).isin(reranking_query)]
|
27 |
|
28 |
|
29 |
def filter_queries(query: str, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
107 |
selected_cols.append(c)
|
108 |
# We use COLS to maintain sorting
|
109 |
filtered_df = df[FIXED_COLS + selected_cols]
|
|
|
110 |
filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
|
111 |
filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
|
112 |
filtered_df.reset_index(inplace=True, drop=True)
|
|
|
123 |
query: str,
|
124 |
show_anonymous: bool
|
125 |
):
|
126 |
+
filtered_df = hidden_df.copy()
|
127 |
if not show_anonymous:
|
|
|
128 |
filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
|
129 |
filtered_df = filter_models(filtered_df, reranking_query)
|
130 |
filtered_df = filter_queries(query, filtered_df)
|
131 |
+
return select_columns(filtered_df, domains, langs, task='qa')
|
|
|
132 |
|
133 |
|
134 |
def update_table_long_doc(
|