Spaces:

FrontierAICybersecurity
/

Cybersecurity_leaderboard

Running

App Files Files Community

Cybersecurity_leaderboard / app.py

yujinyujin9393

Add bountybench

3704b12 verified about 1 month ago

raw

history blame contribute delete

7.79 kB

	import abc, sys
	import gradio as gr

	from gen_table import *
	from meta_data import *

	# import pandas as pd
	# pd.set_option('display.max_colwidth', 0)

	head_style = """
	<style>
	@media (min-width: 1536px)
	{
	.gradio-container {
	min-width: var(--size-full) !important;
	}
	}
	</style>
	"""

	with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head=
	head_style) as demo:
	struct = load_results()
	timestamp = struct['time']
	EVAL_TIME = format_timestamp(timestamp)
	results = struct['results']
	benchmark_list=list(results.keys())

	N_DATA = len(benchmark_list)
	DATASETS = benchmark_list

	gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME))
	structs = [abc.abstractproperty() for _ in range(N_DATA)]

	with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs:
	# with gr.TabItem('🏅 Cybersecurity Main Leaderboard', elem_id='main', id=0):
	# gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA))
	# _, check_box = BUILD_L1_DF(results, DEFAULT_TASK)
	# table = generate_table(results, DEFAULT_TASK)

	# type_map = check_box['type_map']

	# checkbox_group = gr.CheckboxGroup(
	# choices=check_box['all'],
	# value=check_box['required'],
	# label='Aspects of Cybersecurity Work',
	# interactive=True,
	# )

	# headers = check_box['essential'] + checkbox_group.value
	# with gr.Row():
	# model_name = gr.Textbox(
	# value='Input the Model Name (fuzzy, case insensitive)',
	# label='Model Name',
	# interactive=True,
	# visible=True)
	# data_component = gr.components.DataFrame(
	# value=table[headers],
	# type='pandas',
	# datatype=[type_map[x] for x in headers],
	# interactive=False,
	# wrap=True,
	# visible=True)

	# def filter_df(fields, model_name):
	# headers = check_box['essential'] + fields
	# df = generate_table(results, fields)

	# default_val = 'Input the Model Name (fuzzy, case insensitive)'
	# if model_name != default_val:
	# print(model_name)
	# model_name = model_name.lower()
	# method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
	# flag = [model_name in name for name in method_names]
	# df['TEMP_FLAG'] = flag
	# df = df[df['TEMP_FLAG'] == True]
	# df.pop('TEMP_FLAG')

	# comp = gr.components.DataFrame(
	# value=df[headers],
	# type='pandas',
	# datatype=[type_map[x] for x in headers],
	# interactive=False,
	# wrap=True,
	# visible=True)
	# return comp

	# for cbox in [checkbox_group]:
	# cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
	# model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)

	with gr.TabItem('🔍 About', elem_id='about', id=1):
	with open("about.md", 'r', encoding="utf-8") as file:
	gr.Markdown(file.read())

	for i, benchmark in enumerate(benchmark_list):
	with gr.TabItem(f'📊 {benchmark} Leaderboard', elem_id=benchmark, id=i + 2):
	if benchmark in LEADERBOARD_MD:
	gr.Markdown(LEADERBOARD_MD[benchmark])

	s = structs[i]
	s.table, s.check_box = BUILD_L2_DF(results, benchmark)
	s.type_map = s.check_box['type_map']

	s.checkbox_group = gr.CheckboxGroup(
	choices=s.check_box['all'],
	value=s.check_box['required'],
	label=f'{benchmark} CheckBoxes',
	interactive=True,
	)
	s.headers = s.check_box['essential'] + s.checkbox_group.value

	if benchmark not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
	with gr.Row():
	s.model_name = gr.Textbox(
	value='Input the Model Name (fuzzy, case insensitive)',
	label='Model Name',
	interactive=True,
	visible=True)
	else:
	with gr.Row():
	s.model_name = gr.Textbox(
	value='Input the Agent Name (fuzzy, case insensitive)',
	label='Agent Name',
	interactive=True,
	visible=True)
	s.data_component = gr.components.DataFrame(
	value=s.table[s.headers],
	type='pandas',
	datatype=[s.type_map[x] for x in s.headers],
	interactive=False,
	wrap=True,
	visible=True)
	s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False)

	def filter_df_l2(dataset_name, fields, model_name):
	s = structs[benchmark_list.index(dataset_name)]
	headers = s.check_box['essential'] + fields
	df = cp.deepcopy(s.table)
	if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
	default_val = 'Input the Model Name (fuzzy, case insensitive)'
	else:
	default_val = 'Input the Agent Name (fuzzy, case insensitive)'

	if model_name != default_val:
	print(model_name)
	model_name = model_name.lower()
	if dataset_name not in ["SWE-bench-verified", "CyberGym", "BountyBench"]:
	method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
	else:
	method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
	flag = [model_name in name for name in method_names]
	df['TEMP_FLAG'] = flag
	df = df[df['TEMP_FLAG'] == True]
	df.pop('TEMP_FLAG')

	comp = gr.components.DataFrame(
	value=df[headers],
	type='pandas',
	datatype=[s.type_map[x] for x in headers],
	interactive=False,
	wrap=True,
	visible=True)
	return comp

	for cbox in [s.checkbox_group]:
	cbox.change(
	fn=filter_df_l2,
	inputs=[s.dataset, s.checkbox_group, s.model_name],
	outputs=s.data_component)
	s.model_name.submit(
	fn=filter_df_l2,
	inputs=[s.dataset, s.checkbox_group, s.model_name],
	outputs=s.data_component)

	with gr.Row():
	with gr.Accordion('Citation', open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	elem_id='citation-button')

	if __name__ == '__main__':
	demo.launch(server_name='0.0.0.0', share=True)