import json import os import gradio as gr import pandas as pd from huggingface_hub import HfApi, hf_hub_download, get_collection from huggingface_hub.repocard import metadata_load from typing import Dict def get_datasets_nickname() -> Dict: datasets_nickname = {} collection = get_collection("vidore/vidore-benchmark-667173f98e70a1c0fa4db00d") collection_items = collection.items for item in collection_items: dataset_name = item.item_id if 'arxivqa' in dataset_name: datasets_nickname[dataset_name] = 'ArxivQA' datasets_nickname[dataset_name + '_ocr_chunk'] = 'ArxivQA' datasets_nickname[dataset_name + '_captioning'] = 'ArxivQA' elif 'docvqa' in dataset_name: datasets_nickname[dataset_name] = 'DocVQA' datasets_nickname[dataset_name + '_ocr_chunk'] = 'DocVQA' datasets_nickname[dataset_name + '_captioning'] = 'DocVQA' elif 'infovqa' in dataset_name: datasets_nickname[dataset_name] = 'InfoVQA' datasets_nickname[dataset_name + '_ocr_chunk'] = 'InfoVQA' datasets_nickname[dataset_name + '_captioning'] = 'InfoVQA' elif 'tabfquad' in dataset_name: datasets_nickname[dataset_name] = 'TabFQuad' datasets_nickname[dataset_name + '_ocr_chunk'] = 'TabFQuad' datasets_nickname[dataset_name + '_captioning'] = 'TabFQuad' elif 'tatdqa' in dataset_name: datasets_nickname[dataset_name] = 'TATDQA' datasets_nickname[dataset_name + '_ocr_chunk'] = 'TATDQA' datasets_nickname[dataset_name + '_captioning'] = 'TATDQA' elif 'shiftproject' in dataset_name: datasets_nickname[dataset_name] = 'ShiftProject' datasets_nickname[dataset_name + '_ocr_chunk'] = 'ShiftProject' datasets_nickname[dataset_name + '_captioning'] = 'ShiftProject' elif 'artificial_intelligence' in dataset_name: datasets_nickname[dataset_name] = 'Artificial Intelligence' datasets_nickname[dataset_name + '_ocr_chunk'] = 'Artificial Intelligence' datasets_nickname[dataset_name + '_captioning'] = 'Artificial Intelligence' elif 'energy' in dataset_name: datasets_nickname[dataset_name] = 'Energy' datasets_nickname[dataset_name + '_ocr_chunk'] = 'Energy' datasets_nickname[dataset_name + '_captioning'] = 'Energy' elif 'government_reports' in dataset_name: datasets_nickname[dataset_name] = 'Government Reports' datasets_nickname[dataset_name + '_ocr_chunk'] = 'Government Reports' datasets_nickname[dataset_name + '_captioning'] = 'Government Reports' elif 'healthcare' in dataset_name: datasets_nickname[dataset_name] = 'Healthcare' datasets_nickname[dataset_name + '_ocr_chunk'] = 'Healthcare' datasets_nickname[dataset_name + '_captioning'] = 'Healthcare' return datasets_nickname def make_clickable_model(model_name, link=None): if link is None: desanitized_model_name = model_name.replace("_", "/") if '/captioning' in desanitized_model_name: desanitized_model_name = desanitized_model_name.replace('/captioning', '') if '/ocr' in desanitized_model_name: desanitized_model_name = desanitized_model_name.replace('/ocr', '') link = "https://huggingface.co/" + desanitized_model_name # Remove user from model name # return ( # f'{model_name.split("/")[-1]}' # ) return f'{model_name}' def add_rank(df): cols_to_rank = [ col for col in df.columns if col not in [ "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", ] ] if len(cols_to_rank) == 1: df.sort_values(cols_to_rank[0], ascending=False, inplace=True) else: df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False)) df.sort_values("Average", ascending=False, inplace=True) df.insert(0, "Rank", list(range(1, len(df) + 1))) df = df.round(2) # Fill NaN after averaging df.fillna("", inplace=True) return df def get_vidore_data(): api = HfApi() # local cache path model_infos_path = "model_infos.json" metric = "ndcg_at_5" MODEL_INFOS = {} if os.path.exists(model_infos_path): with open(model_infos_path) as f: MODEL_INFOS = json.load(f) models = api.list_models(filter="vidore") repositories = [model.modelId for model in models] datasets_nickname = get_datasets_nickname() for repo_id in repositories: files = [f for f in api.list_repo_files(repo_id) if f.endswith('_metrics.json')] if len(files) == 0: continue else : for file in files: model_name = file.split('_metrics.json')[0] if model_name not in MODEL_INFOS: readme_path = hf_hub_download(repo_id, filename="README.md") meta = metadata_load(readme_path) try: result_path = hf_hub_download(repo_id, filename= file) with open(result_path) as f: results = json.load(f) for dataset in results: results[dataset] = {key: value for key, value in results[dataset].items() if metric in key} MODEL_INFOS[model_name] = {"meta":meta, "results": results} except Exception as e: print(f"Error loading {model_name} - {e}") continue model_res = {} df = None if len(MODEL_INFOS) > 0: for model in MODEL_INFOS.keys(): res = MODEL_INFOS[model]["results"] dataset_res = {} for dataset in res.keys(): if "validation_set" == dataset: continue dataset_res[datasets_nickname[dataset]] = res[dataset][metric] model_res[model] = dataset_res df = pd.DataFrame(model_res).T #save model_infos with open(model_infos_path, "w") as f: json.dump(MODEL_INFOS, f) return df def add_rank_and_format(df): df = df.reset_index() df = df.rename(columns={"index": "Model"}) df = add_rank(df) df["Model"] = df["Model"].apply(make_clickable_model) return df # 1. Force headers to wrap # 2. Force model column (maximum) width # 3. Prevent model column from overflowing, scroll instead # 4. Prevent checkbox groups from taking up too much space css = """ table > thead { white-space: normal } table { --cell-width-1: 250px } table > tbody > tr > td:nth-child(2) > div { overflow-x: auto } .filter-checkbox-group { max-width: max-content; } """ def get_refresh_function(): def _refresh(): data_task_category = get_vidore_data() return add_rank_and_format(data_task_category) return _refresh data = get_vidore_data() data = add_rank_and_format(data) NUM_DATASETS = len(data.columns) - 3 NUM_SCORES = len(data) * NUM_DATASETS NUM_MODELS = len(data) with gr.Blocks(css=css) as block: gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 📚🔍") gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀") gr.Markdown( """ Visual Document Retrieval Benchmark leaderboard. To submit, refer to the ViDoRe GitHub repository. Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models. """ ) with gr.Row(): datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1) dataframe = gr.Dataframe(data, datatype=datatype, type="pandas", height=500) with gr.Row(): refresh_button = gr.Button("Refresh") refresh_button.click(get_refresh_function(), inputs=None, outputs=dataframe, concurrency_limit=20) gr.Markdown( f""" - **Total Datasets**: {NUM_DATASETS} - **Total Scores**: {NUM_SCORES} - **Total Models**: {NUM_MODELS} """ + r""" Please consider citing: ```bibtex INSERT LATER ``` """ ) if __name__ == "__main__": block.queue(max_size=10).launch(debug=True)