The-Arabic-Rag-Leaderboard / reranking_leaderboard.py
MohamedRashad's picture
Add retrieval and reranking leaderboard modules, update requirements and README
6efebdc
from pathlib import Path
from utils import load_json_results
from leaderboard_tab import search_leaderboard, update_columns_to_show, create_leaderboard_tab
# Constants
RERANKER_ABOUT_SECTION = """
## About Reranking Evaluation
The reranking evaluation assesses a model's ability to improve search quality by reordering initially retrieved results. Models are evaluated across multiple unseen Arabic datasets to ensure robust performance.
### Evaluation Metrics
- **MRR@10 (Mean Reciprocal Rank at 10)**: Measures the ranking quality focusing on the first relevant result in top-10
- **NDCG@10 (Normalized DCG at 10)**: Evaluates the ranking quality of all relevant results in top-10
- **MAP (Mean Average Precision)**: Measures the overall precision across all relevant documents
All metrics are averaged across multiple evaluation datasets to provide a comprehensive assessment of model performance.
### Model Requirements
- Must accept query-document pairs as input
- Should output relevance scores for reranking (has cross-attention or similar mechanism for query-document matching)
- Support for Arabic text processing
### Evaluation Process
1. Models are tested on multiple unseen Arabic datasets
2. For each dataset:
- Initial candidate documents are provided
- Model reranks the candidates
- MRR@10, NDCG@10, and MAP are calculated
3. Final scores are averaged across all datasets
4. Models are ranked based on overall performance
### How to Prepare Your Model
- Model should be public on HuggingFace Hub (private models are not supported yet)
- Make sure it works coherently with `sentence-transformers` library
"""
# Global variables
reranking_df = None
def load_reranking_results(prepare_for_display=False, sort_col=None, drop_cols=None):
dataframe_path = Path(__file__).parent / "results" / "reranking_results.json"
return load_json_results(
dataframe_path,
prepare_for_display=prepare_for_display,
sort_col=sort_col,
drop_cols=drop_cols
)
def load_reranking_leaderboard():
"""Load and prepare the reranking leaderboard data"""
global reranking_df
# Prepare reranking dataframe
reranking_df = load_reranking_results(True, sort_col="Average Score", drop_cols=["Revision", "Precision", "Task"])
reranking_df.insert(0, "Rank", range(1, 1 + len(reranking_df)))
return reranking_df
def reranking_search_leaderboard(model_name, columns_to_show):
"""Search function for reranking leaderboard"""
return search_leaderboard(reranking_df, model_name, columns_to_show)
def update_reranker_columns_to_show(columns_to_show):
"""Update displayed columns for reranking leaderboard"""
return update_columns_to_show(reranking_df, columns_to_show)
def create_reranking_tab():
"""Create the complete reranking leaderboard tab"""
global reranking_df
# Load data if not already loaded
if (reranking_df is None):
reranking_df = load_reranking_leaderboard()
# Define default columns to show
default_columns = ["Rank", "Model", "Average Score", "Model Size (MB)", "Context Length",
"Embedding Dimension", "Namaa Global Knowledge", "Navid General Knowledge"]
# Create and return the tab
return create_leaderboard_tab(
df=reranking_df,
initial_columns_to_show=default_columns,
search_function=reranking_search_leaderboard,
update_function=update_reranker_columns_to_show,
about_section=RERANKER_ABOUT_SECTION,
task_type="Reranker"
)