|
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
|
|
import gradio as gr
|
|
import pandas as pd
|
|
import json
|
|
import pdb
|
|
import tempfile
|
|
import re
|
|
from constants import *
|
|
from src.auto_leaderboard.model_metadata_type import ModelType
|
|
import dask.dataframe as dd
|
|
|
|
global data_component, filter_component
|
|
|
|
def validate_model_size(s):
|
|
pattern = r'^\d+B$|^-$'
|
|
if re.match(pattern, s):
|
|
return s
|
|
else:
|
|
return '-'
|
|
|
|
def upload_file(files):
|
|
file_paths = [file.name for file in files]
|
|
return file_paths
|
|
|
|
def prediction_analyse(prediction_content):
|
|
|
|
predictions = prediction_content.split("\n")
|
|
|
|
|
|
df = dd.read_parquet("./file/av_odyssey.parquet")
|
|
ground_truth = {row[0]: row[6] for row in df.itertuples(index=False, name=None)}
|
|
|
|
|
|
results = {i: {"correct": 0, "total": 0} for i in range(1, 27)}
|
|
|
|
|
|
for prediction in predictions:
|
|
|
|
prediction = prediction.strip()
|
|
if not prediction:
|
|
continue
|
|
try:
|
|
prediction = json.loads(prediction)
|
|
except json.JSONDecodeError:
|
|
print(f"Warning: Skipping invalid JSON data in line: {prediction}")
|
|
continue
|
|
question_id = prediction["question_id"]
|
|
if question_id not in ground_truth.keys():
|
|
continue
|
|
gt_item = ground_truth[question_id]
|
|
question_type_id = question_id.split("_")[0]
|
|
|
|
if prediction["prediction"] == gt_item:
|
|
results[int(question_type_id)]["correct"] += 1
|
|
|
|
results[int(question_type_id)]["total"] += 1
|
|
|
|
return results
|
|
|
|
|
|
def add_new_eval(
|
|
input_file,
|
|
model_name_textbox: str,
|
|
revision_name_textbox: str,
|
|
model_link: str,
|
|
):
|
|
if input_file is None:
|
|
return "Error! Empty file!"
|
|
else:
|
|
|
|
content = input_file.decode("utf-8")
|
|
prediction = prediction_analyse(content)
|
|
csv_data = pd.read_csv(CSV_DIR)
|
|
|
|
|
|
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) for i in range(1, 27)}
|
|
|
|
|
|
total_correct_timbre = round(sum(prediction[i]["correct"] for i in range(timbre_task[0], timbre_task[1] + 1)) / sum(prediction[i]["total"] for i in range(timbre_task[0], timbre_task[1] + 1)) * 100, 1)
|
|
total_correct_tone = round(sum(prediction[i]["correct"] for i in range(tone_task[0], tone_task[1] + 1)) / sum(prediction[i]["total"] for i in range(tone_task[0], tone_task[1] + 1)) * 100, 1)
|
|
total_correct_melody = round(sum(prediction[i]["correct"] for i in range(melody_task[0], melody_task[1] + 1)) / sum(prediction[i]["total"] for i in range(melody_task[0], melody_task[1] + 1)) * 100, 1)
|
|
total_correct_space = round(sum(prediction[i]["correct"] for i in range(space_task[0], space_task[1] + 1)) / sum(prediction[i]["total"] for i in range(space_task[0], space_task[1] + 1)) * 100, 1)
|
|
total_correct_time = round(sum(prediction[i]["correct"] for i in range(time_task[0], time_task[1] + 1)) / sum(prediction[i]["total"] for i in range(time_task[0], time_task[1] + 1)) * 100, 1)
|
|
total_correct_hallucination = round(sum(prediction[i]["correct"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) / sum(prediction[i]["total"] for i in range(hallucination_task[0], hallucination_task[1] + 1)) * 100, 1)
|
|
total_correct_intricay = round(sum(prediction[i]["correct"] for i in range(intricay_task[0], intricay_task[1] + 1)) / sum(prediction[i]["total"] for i in range(intricay_task[0], intricay_task[1] + 1)) * 100, 1)
|
|
all_average = round(sum(prediction[i]["correct"] for i in range(1, 27)) / sum(prediction[i]["total"] for i in range(1, 27)) * 100, 1)
|
|
|
|
if revision_name_textbox == '':
|
|
col = csv_data.shape[0]
|
|
model_name = model_name_textbox
|
|
else:
|
|
model_name = revision_name_textbox
|
|
model_name_list = csv_data['Model']
|
|
name_list = [name.split(']')[0][1:] for name in model_name_list]
|
|
if revision_name_textbox not in name_list:
|
|
col = csv_data.shape[0]
|
|
else:
|
|
col = name_list.index(revision_name_textbox)
|
|
|
|
if model_link == '':
|
|
model_name = model_name
|
|
else:
|
|
model_name = '[' + model_name + '](' + model_link + ')'
|
|
|
|
|
|
new_data = [
|
|
model_name,
|
|
all_average,
|
|
total_correct_timbre,
|
|
total_correct_tone,
|
|
total_correct_melody,
|
|
total_correct_space,
|
|
total_correct_time,
|
|
total_correct_hallucination,
|
|
total_correct_intricay,
|
|
each_task_accuracy[1],
|
|
each_task_accuracy[2],
|
|
each_task_accuracy[3],
|
|
each_task_accuracy[4],
|
|
each_task_accuracy[5],
|
|
each_task_accuracy[6],
|
|
each_task_accuracy[7],
|
|
each_task_accuracy[8],
|
|
each_task_accuracy[9],
|
|
each_task_accuracy[10],
|
|
each_task_accuracy[11],
|
|
each_task_accuracy[12],
|
|
each_task_accuracy[13],
|
|
each_task_accuracy[14],
|
|
each_task_accuracy[15],
|
|
each_task_accuracy[16],
|
|
each_task_accuracy[17],
|
|
each_task_accuracy[18],
|
|
each_task_accuracy[19],
|
|
each_task_accuracy[20],
|
|
each_task_accuracy[21],
|
|
each_task_accuracy[22],
|
|
each_task_accuracy[23],
|
|
each_task_accuracy[24],
|
|
each_task_accuracy[25],
|
|
each_task_accuracy[26],
|
|
]
|
|
csv_data.loc[col] = new_data
|
|
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
|
|
|
return 0
|
|
|
|
def get_baseline_df():
|
|
df = pd.read_csv(CSV_DIR)
|
|
df = df.sort_values(by="Avg. All", ascending=False)
|
|
present_columns = MODEL_INFO + checkbox_group.value
|
|
df = df[present_columns]
|
|
return df
|
|
|
|
def get_all_df():
|
|
df = pd.read_csv(CSV_DIR)
|
|
df = df.sort_values(by="Avg. All", ascending=False)
|
|
return df
|
|
|
|
|
|
def switch_version(version):
|
|
return f"当前版本: {version}"
|
|
|
|
block = gr.Blocks()
|
|
|
|
|
|
with block:
|
|
gr.Markdown(
|
|
LEADERBORAD_INTRODUCTION
|
|
)
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
|
|
|
with gr.TabItem("🏅 AV-Odyssey Benchmark", elem_id="av-odyssey-tab-table", id=1):
|
|
with gr.Row():
|
|
with gr.Accordion("Citation", open=False):
|
|
citation_button = gr.Textbox(
|
|
value=CITATION_BUTTON_TEXT,
|
|
label=CITATION_BUTTON_LABEL,
|
|
elem_id="citation-button",
|
|
).style(show_copy_button=True)
|
|
|
|
gr.Markdown(
|
|
TABLE_INTRODUCTION
|
|
)
|
|
|
|
|
|
checkbox_group = gr.CheckboxGroup(
|
|
choices=TASK_INFO,
|
|
value=AVG_INFO,
|
|
label="Evaluation Dimension",
|
|
interactive=True,
|
|
)
|
|
|
|
|
|
baseline_value = get_baseline_df()
|
|
baseline_header = MODEL_INFO + checkbox_group.value
|
|
baseline_datatype = ['markdown'] * len(MODEL_INFO) + ['number'] * len(checkbox_group.value)
|
|
|
|
data_component = gr.components.Dataframe(
|
|
value=baseline_value,
|
|
headers=baseline_header,
|
|
type="pandas",
|
|
datatype=baseline_datatype,
|
|
interactive=False,
|
|
visible=True,
|
|
)
|
|
|
|
def on_filter_model_size_method_change(selected_columns):
|
|
|
|
updated_data = get_all_df()
|
|
|
|
|
|
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
|
present_columns = MODEL_INFO + selected_columns
|
|
updated_data = updated_data[present_columns]
|
|
updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
|
|
updated_headers = present_columns
|
|
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
|
|
|
|
filter_component = gr.components.Dataframe(
|
|
value=updated_data,
|
|
headers=updated_headers,
|
|
type="pandas",
|
|
datatype=update_datatype,
|
|
interactive=False,
|
|
visible=True,
|
|
)
|
|
|
|
|
|
return filter_component.value
|
|
|
|
def on_average_type_change(average_type):
|
|
return get_baseline_df()
|
|
|
|
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
|
|
|
|
|
|
with gr.TabItem("📝 About", elem_id="av-odyssey-tab-table", id=2):
|
|
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
|
|
|
|
|
|
with gr.TabItem("🚀 Submit here! ", elem_id="av-odyssey-tab-table", id=3):
|
|
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
|
|
|
|
with gr.Row():
|
|
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
|
|
|
with gr.Row():
|
|
gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
|
|
|
|
with gr.Row():
|
|
with gr.Column():
|
|
model_name_textbox = gr.Textbox(
|
|
label="Model name", placeholder="VideoLLaMA2"
|
|
)
|
|
revision_name_textbox = gr.Textbox(
|
|
label="Revision Model Name", placeholder="VideoLLaMA2"
|
|
)
|
|
model_link = gr.Textbox(
|
|
label="Model Link", placeholder="https://huggingface.co/DAMO-NLP-SG/VideoLLaMA2.1-7B-16F"
|
|
)
|
|
|
|
|
|
with gr.Column():
|
|
|
|
input_file = gr.inputs.File(label = "Click to Upload a json File", file_count="single", type='binary')
|
|
submit_button = gr.Button("Submit Eval")
|
|
|
|
submission_result = gr.Markdown()
|
|
submit_button.click(
|
|
add_new_eval,
|
|
inputs = [
|
|
input_file,
|
|
model_name_textbox,
|
|
revision_name_textbox,
|
|
model_link
|
|
],
|
|
)
|
|
|
|
|
|
def refresh_data():
|
|
value1 = get_baseline_df()
|
|
|
|
return value1
|
|
|
|
with gr.Row():
|
|
data_run = gr.Button("Refresh")
|
|
data_run.click(
|
|
refresh_data, outputs=data_component
|
|
)
|
|
|
|
|
|
block.launch() |