Spaces:
Running
Running
handling_the_model_alias_field
#6
by
fzoll
- opened
- app/backend/data_engine.py +22 -2
app/backend/data_engine.py
CHANGED
|
@@ -20,7 +20,8 @@ COLUMNS_TYPES = ["markdown",
|
|
| 20 |
|
| 21 |
]
|
| 22 |
|
| 23 |
-
|
|
|
|
| 24 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
| 25 |
MODEL_URL = f"{GIT_URL}models.json"
|
| 26 |
RESULT_URL = f"{GIT_URL}results.json"
|
|
@@ -104,6 +105,25 @@ class DataEngine:
|
|
| 104 |
|
| 105 |
df_model = pd.DataFrame(models_list)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
|
| 108 |
|
| 109 |
# set dataset default value to 0
|
|
@@ -132,7 +152,7 @@ class DataEngine:
|
|
| 132 |
|
| 133 |
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
|
| 134 |
|
| 135 |
-
df = pd.merge(
|
| 136 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
| 137 |
|
| 138 |
if df.empty:
|
|
|
|
| 20 |
|
| 21 |
]
|
| 22 |
|
| 23 |
+
BRANCH = 'main'
|
| 24 |
+
GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
|
| 25 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
| 26 |
MODEL_URL = f"{GIT_URL}models.json"
|
| 27 |
RESULT_URL = f"{GIT_URL}results.json"
|
|
|
|
| 105 |
|
| 106 |
df_model = pd.DataFrame(models_list)
|
| 107 |
|
| 108 |
+
# Create mapping for model names/aliases
|
| 109 |
+
if 'alias' in df_model.columns:
|
| 110 |
+
# Create a lookup table for alias to model_name mapping
|
| 111 |
+
alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
|
| 112 |
+
|
| 113 |
+
# Add rows for aliases to enable joining
|
| 114 |
+
alias_rows = []
|
| 115 |
+
for _, row in df_model[df_model['alias'].notna()].iterrows():
|
| 116 |
+
alias_row = row.copy()
|
| 117 |
+
alias_row['model_name'] = row['alias']
|
| 118 |
+
alias_rows.append(alias_row)
|
| 119 |
+
|
| 120 |
+
if alias_rows:
|
| 121 |
+
df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
|
| 122 |
+
else:
|
| 123 |
+
df_model_extended = df_model
|
| 124 |
+
else:
|
| 125 |
+
df_model_extended = df_model
|
| 126 |
+
|
| 127 |
df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
|
| 128 |
|
| 129 |
# set dataset default value to 0
|
|
|
|
| 152 |
|
| 153 |
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
|
| 154 |
|
| 155 |
+
df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
|
| 156 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
| 157 |
|
| 158 |
if df.empty:
|