Spaces:
Running
Running
import gradio as gr | |
from app.utils import add_rank_and_format, filter_models, get_refresh_function | |
from data.model_handler import ModelHandler | |
METRICS = [ | |
"ndcg_at_1", | |
"ndcg_at_5", | |
"ndcg_at_10", | |
"ndcg_at_100", | |
"recall_at_1", | |
"recall_at_5", | |
"recall_at_10", | |
"recall_at_100", | |
] | |
def main(): | |
model_handler = ModelHandler() | |
initial_metric = "ndcg_at_5" | |
model_handler.get_vidore_data(initial_metric) | |
data_benchmark_1 = model_handler.compute_averages(initial_metric, benchmark_version=1) | |
data_benchmark_1 = add_rank_and_format(data_benchmark_1, benchmark_version=1) | |
data_benchmark_2 = model_handler.compute_averages(initial_metric, benchmark_version=2) | |
data_benchmark_2 = add_rank_and_format(data_benchmark_2, benchmark_version=2) | |
NUM_DATASETS_1 = len(data_benchmark_1.columns) - 3 | |
NUM_SCORES_1 = len(data_benchmark_1) * NUM_DATASETS_1 | |
NUM_MODELS_1 = len(data_benchmark_1) | |
NUM_DATASETS_2 = len(data_benchmark_2.columns) - 3 | |
NUM_SCORES_2 = len(data_benchmark_2) * NUM_DATASETS_2 | |
NUM_MODELS_2 = len(data_benchmark_2) | |
css = """ | |
table > thead { | |
white-space: normal | |
} | |
table { | |
--cell-width-1: 250px | |
} | |
table > tbody > tr > td:nth-child(2) > div { | |
overflow-x: auto | |
} | |
.filter-checkbox-group { | |
max-width: max-content; | |
} | |
#markdown size | |
.markdown { | |
font-size: 1rem; | |
} | |
""" | |
with gr.Blocks(css=css) as block: | |
with gr.Tabs(): | |
with gr.TabItem("π ViDoRe V1"): | |
gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 1 ππ") | |
gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models π") | |
gr.Markdown( | |
""" | |
Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab. | |
Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models. | |
""" | |
) | |
datasets_columns_1 = list(data_benchmark_1.columns[3:]) | |
with gr.Row(): | |
metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") | |
research_textbox_1 = gr.Textbox( | |
placeholder="π Search Models... [press enter]", | |
label="Filter Models by Name", | |
) | |
column_checkboxes_1 = gr.CheckboxGroup( | |
choices=datasets_columns_1, value=datasets_columns_1, label="Select Columns to Display" | |
) | |
with gr.Row(): | |
datatype_1 = ["number", "markdown"] + ["number"] * (NUM_DATASETS_1 + 1) | |
dataframe_1 = gr.Dataframe(data_benchmark_1, datatype=datatype_1, type="pandas") | |
def update_data_1(metric, search_term, selected_columns): | |
model_handler.get_vidore_data(metric) | |
data = model_handler.compute_averages(metric, benchmark_version=1) | |
data = add_rank_and_format(data, benchmark_version=1) | |
data = filter_models(data, search_term) | |
# data = remove_duplicates(data) # Add this line | |
if selected_columns: | |
data = data[["Rank", "Model", "Average"] + selected_columns] | |
return data | |
with gr.Row(): | |
refresh_button_1 = gr.Button("Refresh") | |
refresh_button_1.click( | |
get_refresh_function(model_handler, benchmark_version=1), | |
inputs=[metric_dropdown_1], | |
outputs=dataframe_1, | |
concurrency_limit=20, | |
) | |
# Automatically refresh the dataframe when the dropdown value changes | |
metric_dropdown_1.change( | |
get_refresh_function(model_handler, benchmark_version=1), | |
inputs=[metric_dropdown_1], | |
outputs=dataframe_1, | |
) | |
research_textbox_1.submit( | |
lambda metric, search_term, selected_columns: update_data_1(metric, search_term, selected_columns), | |
inputs=[metric_dropdown_1, research_textbox_1, column_checkboxes_1], | |
outputs=dataframe_1, | |
) | |
column_checkboxes_1.change( | |
lambda metric, search_term, selected_columns: update_data_1(metric, search_term, selected_columns), | |
inputs=[metric_dropdown_1, research_textbox_1, column_checkboxes_1], | |
outputs=dataframe_1, | |
) | |
gr.Markdown( | |
f""" | |
- **Total Datasets**: {NUM_DATASETS_1} | |
- **Total Scores**: {NUM_SCORES_1} | |
- **Total Models**: {NUM_MODELS_1} | |
""" | |
+ r""" | |
Please consider citing: | |
```bibtex | |
@misc{faysse2024colpaliefficientdocumentretrieval, | |
title={ColPali: Efficient Document Retrieval with Vision Language Models}, | |
author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and CΓ©line Hudelot and Pierre Colombo}, | |
year={2024}, | |
eprint={2407.01449}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.IR}, | |
url={https://arxiv.org/abs/2407.01449}, | |
} | |
``` | |
""" | |
) | |
with gr.TabItem("π ViDoRe V2"): | |
gr.Markdown("# ViDoRe V2: A new visual Document Retrieval Benchmark ππ") | |
gr.Markdown("### A harder dataset benchmark for visual document retrieval π") | |
gr.Markdown( | |
""" | |
Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab. | |
Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models. | |
""" | |
) | |
datasets_columns_2 = list(data_benchmark_2.columns[3:]) | |
with gr.Row(): | |
metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") | |
research_textbox_2 = gr.Textbox( | |
placeholder="π Search Models... [press enter]", | |
label="Filter Models by Name", | |
) | |
column_checkboxes_2 = gr.CheckboxGroup( | |
choices=datasets_columns_2, value=datasets_columns_2, label="Select Columns to Display" | |
) | |
with gr.Row(): | |
datatype_2 = ["number", "markdown"] + ["number"] * (NUM_DATASETS_2 + 1) | |
dataframe_2 = gr.Dataframe(data_benchmark_2, datatype=datatype_2, type="pandas") | |
def update_data_2(metric, search_term, selected_columns): | |
model_handler.get_vidore_data(metric) | |
data = model_handler.compute_averages(metric, benchmark_version=2) | |
data = add_rank_and_format(data, benchmark_version=2) | |
data = filter_models(data, search_term) | |
# data = remove_duplicates(data) # Add this line | |
if selected_columns: | |
data = data[["Rank", "Model", "Average"] + selected_columns] | |
return data | |
with gr.Row(): | |
refresh_button_2 = gr.Button("Refresh") | |
refresh_button_2.click( | |
get_refresh_function(model_handler, benchmark_version=2), | |
inputs=[metric_dropdown_2], | |
outputs=dataframe_2, | |
concurrency_limit=20, | |
) | |
with gr.Row(): | |
gr.Markdown( | |
""" | |
**Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side. | |
Those numbers are not numbers obtained from the organisations that released those models. | |
""" | |
) | |
# Automatically refresh the dataframe when the dropdown value changes | |
metric_dropdown_2.change( | |
get_refresh_function(model_handler, benchmark_version=2), | |
inputs=[metric_dropdown_2], | |
outputs=dataframe_2, | |
) | |
research_textbox_2.submit( | |
lambda metric, search_term, selected_columns: update_data_2(metric, search_term, selected_columns), | |
inputs=[metric_dropdown_2, research_textbox_2, column_checkboxes_2], | |
outputs=dataframe_2, | |
) | |
column_checkboxes_2.change( | |
lambda metric, search_term, selected_columns: update_data_2(metric, search_term, selected_columns), | |
inputs=[metric_dropdown_2, research_textbox_2, column_checkboxes_2], | |
outputs=dataframe_2, | |
) | |
gr.Markdown( | |
f""" | |
- **Total Datasets**: {NUM_DATASETS_2} | |
- **Total Scores**: {NUM_SCORES_2} | |
- **Total Models**: {NUM_MODELS_2} | |
""" | |
+ r""" | |
Please consider citing: | |
```bibtex | |
@misc{faysse2024colpaliefficientdocumentretrieval, | |
title={ColPali: Efficient Document Retrieval with Vision Language Models}, | |
author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and CΓ©line Hudelot and Pierre Colombo}, | |
year={2024}, | |
eprint={2407.01449}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.IR}, | |
url={https://arxiv.org/abs/2407.01449}, | |
} | |
``` | |
""" | |
) | |
with gr.TabItem("π Submit your model"): | |
gr.Markdown("# How to Submit a New Model to the Leaderboard") | |
gr.Markdown( | |
""" | |
To submit a new model to the ViDoRe leaderboard, follow these steps: | |
1. **Evaluate your model**: | |
- Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/) | |
2. **Format your submission file**: | |
- The submission file should automatically be generated, and named `results.json` with the | |
following structure: | |
```json | |
{ | |
"dataset_name_1": { | |
"metric_1": score_1, | |
"metric_2": score_2, | |
... | |
}, | |
"dataset_name_2": { | |
"metric_1": score_1, | |
"metric_2": score_2, | |
... | |
}, | |
} | |
``` | |
- The dataset names should be the same as the ViDoRe and ViDoRe 2 dataset names listed in the following | |
collections: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d) and [ViDoRe Benchmark 2](vidore/vidore-benchmark-v2-dev-67ae03e3924e85b36e7f53b0). | |
3. **Submit your model**: | |
- Create a public HuggingFace model repository with your model. | |
- Add the tag `vidore` to your model in the metadata of the model card and place the | |
`results.json` file at the root. | |
And you're done! Your model will appear on the leaderboard when you click refresh! Once the space | |
gets rebooted, it will appear on startup. | |
Note: For proper hyperlink redirection, please ensure that your model repository name is in | |
kebab-case, e.g. `my-model-name`. | |
""" | |
) | |
block.queue(max_size=10).launch(debug=True) | |
if __name__ == "__main__": | |
main() | |