123
Browse files
app.py
CHANGED
@@ -64,11 +64,20 @@ def format_table(df):
|
|
64 |
# Function to calculate top 3 models based on combined score (average of numeric columns)
|
65 |
def get_top_3_models(robustness_df, context_grounding_df):
|
66 |
# Combine numeric columns from both datasets
|
67 |
-
numeric_cols_robustness = ["Baseline", "Robustness (Δ)"] #
|
68 |
numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
|
69 |
|
70 |
-
# Extract numeric values for
|
71 |
-
robustness_scores =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
context_scores = context_grounding_df[numeric_cols_context].astype(float)
|
73 |
|
74 |
# Combine scores by averaging
|
|
|
64 |
# Function to calculate top 3 models based on combined score (average of numeric columns)
|
65 |
def get_top_3_models(robustness_df, context_grounding_df):
|
66 |
# Combine numeric columns from both datasets
|
67 |
+
numeric_cols_robustness = ["Baseline", "Robustness (Δ)"] # Columns with numeric or string-numeric data
|
68 |
numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
|
69 |
|
70 |
+
# Extract numeric values for each column in robustness_df
|
71 |
+
robustness_scores = pd.DataFrame()
|
72 |
+
for col in numeric_cols_robustness:
|
73 |
+
if any(" (" in str(x) for x in robustness_df[col]):
|
74 |
+
# Handle string values with deltas (e.g., "0.95 (0.0)")
|
75 |
+
robustness_scores[col] = robustness_df[col].apply(lambda x: float(str(x).split(" (")[0]) if " (" in str(x) else float(x))
|
76 |
+
else:
|
77 |
+
# Handle direct float values
|
78 |
+
robustness_scores[col] = robustness_df[col].astype(float)
|
79 |
+
|
80 |
+
# Extract numeric values for context_grounding_df (all are already float values)
|
81 |
context_scores = context_grounding_df[numeric_cols_context].astype(float)
|
82 |
|
83 |
# Combine scores by averaging
|