import ast import json import urllib import gradio as gr import pandas as pd from datasets import load_dataset from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns, ColumnFilter df = pd.read_json("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_table.jsonl", lines=True).explode("suite").reset_index(drop=True) with urllib.request.urlopen( "https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_prompt_formatting.py") as f: tasks_prompt_functions_raw = f.read().decode('utf-8') tree = ast.parse(tasks_prompt_functions_raw) tasks_prompt_functions = {} for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): function_name = node.name # Get the source code for the function node function_code = ast.get_source_segment(tasks_prompt_functions_raw, node) tasks_prompt_functions[function_name] = function_code def load_task_metadata(task_id): task_row = df.iloc[task_id] return (task_row.to_dict(), f"""Examples from the HF repository ([{task_row['hf_repo']}](https://huggingface.co/datasets/{task_row['hf_repo']}))""", tasks_prompt_functions.get(task_row["prompt_function"]), "unknown") def load_task_examples(task_id): task_row = df.iloc[task_id] dataset = load_dataset(task_row["hf_repo"], task_row["hf_subset"], split="+".join(task_row["evaluation_splits"]), trust_remote_code=task_row["trust_dataset"], streaming=True) sample_data = next(dataset.iter(20)) # dictionary of lists to list of dictionaries return pd.DataFrame( dict(zip(sample_data, t if not isinstance(t, dict) and not isinstance(t, list) else json.dumps(t))) for t in zip(*sample_data.values())) with gr.Blocks() as demo: gr.Markdown(""" # LightEval Tasks Explorer """) with gr.Tabs() as tabs: with gr.TabItem("🗃️ Tasks List"): Leaderboard( value=df, select_columns=SelectColumns( default_selection=["name", "suite", "prompt_function", "hf_repo", "hf_subset", "evaluation_splits", "metric"], cant_deselect=["name", "suite"], label="Columns to display"), search_columns=SearchColumns(primary_column="name", secondary_columns=["suite", "prompt_function", "hf_repo", "metric"], placeholder="Search for a task by name, suite, prompt_function, hf_repo or " "metric. To search by suite, for example, type 'suite:'. Separate queries by \";\"", label="Search"), filter_columns=[ ColumnFilter("suite", type="dropdown", label="Select suite"), # ColumnFilter("prompt_function", type="dropdown", label="Select prompt_function"), # ColumnFilter("metric", type="dropdown", label="Select metric") ], wrap=True ) with gr.TabItem("🔎 Task Inspector"): task_inspector_selector = gr.Dropdown( choices=sorted(zip((df['suite'] + '|' + df['name']).tolist(), range(len(df)))), label="Task", info="Select a task" ) with gr.Row(): with gr.Column(): task_metadata = gr.Json(label="Task definition") with gr.Column(): task_prompt_function = gr.Code(label="Task prompt function", language="python", interactive=False) task_dataset_header = gr.Markdown("Examples from the HF repository") task_dataset = gr.Dataframe(wrap=True) gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_metadata, task_dataset_header, task_prompt_function], fn=load_task_metadata) gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_dataset], fn=load_task_examples) if __name__ == "__main__": demo.launch()