Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from pathlib import Path | |
from utils import load_json_results | |
from leaderboard_tab import search_leaderboard, update_columns_to_show, create_leaderboard_tab | |
# Constants | |
RERANKER_ABOUT_SECTION = """ | |
## About Reranking Evaluation | |
The reranking evaluation assesses a model's ability to improve search quality by reordering initially retrieved results. Models are evaluated across multiple unseen Arabic datasets to ensure robust performance. | |
### Evaluation Metrics | |
- **MRR@10 (Mean Reciprocal Rank at 10)**: Measures the ranking quality focusing on the first relevant result in top-10 | |
- **NDCG@10 (Normalized DCG at 10)**: Evaluates the ranking quality of all relevant results in top-10 | |
- **MAP (Mean Average Precision)**: Measures the overall precision across all relevant documents | |
All metrics are averaged across multiple evaluation datasets to provide a comprehensive assessment of model performance. | |
### Model Requirements | |
- Must accept query-document pairs as input | |
- Should output relevance scores for reranking (has cross-attention or similar mechanism for query-document matching) | |
- Support for Arabic text processing | |
### Evaluation Process | |
1. Models are tested on multiple unseen Arabic datasets | |
2. For each dataset: | |
- Initial candidate documents are provided | |
- Model reranks the candidates | |
- MRR@10, NDCG@10, and MAP are calculated | |
3. Final scores are averaged across all datasets | |
4. Models are ranked based on overall performance | |
### How to Prepare Your Model | |
- Model should be public on HuggingFace Hub (private models are not supported yet) | |
- Make sure it works coherently with `sentence-transformers` library | |
""" | |
# Global variables | |
reranking_df = None | |
def load_reranking_results(prepare_for_display=False, sort_col=None, drop_cols=None): | |
dataframe_path = Path(__file__).parent / "results" / "reranking_results.json" | |
return load_json_results( | |
dataframe_path, | |
prepare_for_display=prepare_for_display, | |
sort_col=sort_col, | |
drop_cols=drop_cols | |
) | |
def load_reranking_leaderboard(): | |
"""Load and prepare the reranking leaderboard data""" | |
global reranking_df | |
# Prepare reranking dataframe | |
reranking_df = load_reranking_results(True, sort_col="Average Score", drop_cols=["Revision", "Precision", "Task"]) | |
reranking_df.insert(0, "Rank", range(1, 1 + len(reranking_df))) | |
return reranking_df | |
def reranking_search_leaderboard(model_name, columns_to_show): | |
"""Search function for reranking leaderboard""" | |
return search_leaderboard(reranking_df, model_name, columns_to_show) | |
def update_reranker_columns_to_show(columns_to_show): | |
"""Update displayed columns for reranking leaderboard""" | |
return update_columns_to_show(reranking_df, columns_to_show) | |
def create_reranking_tab(): | |
"""Create the complete reranking leaderboard tab""" | |
global reranking_df | |
# Load data if not already loaded | |
if (reranking_df is None): | |
reranking_df = load_reranking_leaderboard() | |
# Define default columns to show | |
default_columns = ["Rank", "Model", "Average Score", "Model Size (MB)", "Context Length", | |
"Embedding Dimension", "Namaa Global Knowledge", "Navid General Knowledge"] | |
# Create and return the tab | |
return create_leaderboard_tab( | |
df=reranking_df, | |
initial_columns_to_show=default_columns, | |
search_function=reranking_search_leaderboard, | |
update_function=update_reranker_columns_to_show, | |
about_section=RERANKER_ABOUT_SECTION, | |
task_type="Reranker" | |
) | |