Spaces:
Runtime error
Runtime error
import abc, sys | |
import gradio as gr | |
from gen_table import * | |
from meta_data import * | |
# import pandas as pd | |
# pd.set_option('display.max_colwidth', 0) | |
head_style = """ | |
<style> | |
@media (min-width: 1536px) | |
{ | |
.gradio-container { | |
min-width: var(--size-full) !important; | |
} | |
} | |
</style> | |
""" | |
with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head= | |
head_style) as demo: | |
struct = load_results() | |
timestamp = struct['time'] | |
EVAL_TIME = format_timestamp(timestamp) | |
results = struct['results'] | |
benchmark_list=list(results.keys()) | |
N_DATA = len(benchmark_list) | |
DATASETS = benchmark_list | |
gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME)) | |
structs = [abc.abstractproperty() for _ in range(N_DATA)] | |
with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs: | |
# with gr.TabItem('π Cybersecurity Main Leaderboard', elem_id='main', id=0): | |
# gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA)) | |
# _, check_box = BUILD_L1_DF(results, DEFAULT_TASK) | |
# table = generate_table(results, DEFAULT_TASK) | |
# type_map = check_box['type_map'] | |
# checkbox_group = gr.CheckboxGroup( | |
# choices=check_box['all'], | |
# value=check_box['required'], | |
# label='Aspects of Cybersecurity Work', | |
# interactive=True, | |
# ) | |
# headers = check_box['essential'] + checkbox_group.value | |
# with gr.Row(): | |
# model_name = gr.Textbox( | |
# value='Input the Model Name (fuzzy, case insensitive)', | |
# label='Model Name', | |
# interactive=True, | |
# visible=True) | |
# data_component = gr.components.DataFrame( | |
# value=table[headers], | |
# type='pandas', | |
# datatype=[type_map[x] for x in headers], | |
# interactive=False, | |
# wrap=True, | |
# visible=True) | |
# def filter_df(fields, model_name): | |
# headers = check_box['essential'] + fields | |
# df = generate_table(results, fields) | |
# default_val = 'Input the Model Name (fuzzy, case insensitive)' | |
# if model_name != default_val: | |
# print(model_name) | |
# model_name = model_name.lower() | |
# method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']] | |
# flag = [model_name in name for name in method_names] | |
# df['TEMP_FLAG'] = flag | |
# df = df[df['TEMP_FLAG'] == True] | |
# df.pop('TEMP_FLAG') | |
# comp = gr.components.DataFrame( | |
# value=df[headers], | |
# type='pandas', | |
# datatype=[type_map[x] for x in headers], | |
# interactive=False, | |
# wrap=True, | |
# visible=True) | |
# return comp | |
# for cbox in [checkbox_group]: | |
# cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component) | |
# model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component) | |
with gr.TabItem('π About', elem_id='about', id=1): | |
with open("about.md", 'r', encoding="utf-8") as file: | |
gr.Markdown(file.read()) | |
for i, benchmark in enumerate(benchmark_list): | |
with gr.TabItem(f'π {benchmark} Leaderboard', elem_id=benchmark, id=i + 2): | |
if benchmark in LEADERBOARD_MD: | |
gr.Markdown(LEADERBOARD_MD[benchmark]) | |
s = structs[i] | |
s.table, s.check_box = BUILD_L2_DF(results, benchmark) | |
s.type_map = s.check_box['type_map'] | |
s.checkbox_group = gr.CheckboxGroup( | |
choices=s.check_box['all'], | |
value=s.check_box['required'], | |
label=f'{benchmark} CheckBoxes', | |
interactive=True, | |
) | |
s.headers = s.check_box['essential'] + s.checkbox_group.value | |
if benchmark!='SWE-bench-verified': | |
with gr.Row(): | |
s.model_name = gr.Textbox( | |
value='Input the Model Name (fuzzy, case insensitive)', | |
label='Model Name', | |
interactive=True, | |
visible=True) | |
else: | |
with gr.Row(): | |
s.model_name = gr.Textbox( | |
value='Input the Agent Name (fuzzy, case insensitive)', | |
label='Agent Name', | |
interactive=True, | |
visible=True) | |
s.data_component = gr.components.DataFrame( | |
value=s.table[s.headers], | |
type='pandas', | |
datatype=[s.type_map[x] for x in s.headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False) | |
def filter_df_l2(dataset_name, fields, model_name): | |
s = structs[benchmark_list.index(dataset_name)] | |
headers = s.check_box['essential'] + fields | |
df = cp.deepcopy(s.table) | |
if dataset_name!="SWE-bench-verified": | |
default_val = 'Input the Model Name (fuzzy, case insensitive)' | |
else: | |
default_val = 'Input the Agent Name (fuzzy, case insensitive)' | |
if model_name != default_val: | |
print(model_name) | |
model_name = model_name.lower() | |
if dataset_name!="SWE-bench-verified": | |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']] | |
else: | |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']] | |
flag = [model_name in name for name in method_names] | |
df['TEMP_FLAG'] = flag | |
df = df[df['TEMP_FLAG'] == True] | |
df.pop('TEMP_FLAG') | |
comp = gr.components.DataFrame( | |
value=df[headers], | |
type='pandas', | |
datatype=[s.type_map[x] for x in headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
return comp | |
for cbox in [s.checkbox_group]: | |
cbox.change( | |
fn=filter_df_l2, | |
inputs=[s.dataset, s.checkbox_group, s.model_name], | |
outputs=s.data_component) | |
s.model_name.submit( | |
fn=filter_df_l2, | |
inputs=[s.dataset, s.checkbox_group, s.model_name], | |
outputs=s.data_component) | |
with gr.Row(): | |
with gr.Accordion('Citation', open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id='citation-button') | |
if __name__ == '__main__': | |
demo.launch(server_name='0.0.0.0', share=True) | |