yujinyujin9393 commited on
Commit
b312f01
Β·
verified Β·
1 Parent(s): ea5e3a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -184
app.py CHANGED
@@ -1,184 +1,184 @@
1
- import abc, sys
2
- import gradio as gr
3
-
4
- from gen_table import *
5
- from meta_data import *
6
-
7
- # import pandas as pd
8
- # pd.set_option('display.max_colwidth', 0)
9
-
10
- head_style = """
11
- <style>
12
- @media (min-width: 1536px)
13
- {
14
- .gradio-container {
15
- min-width: var(--size-full) !important;
16
- }
17
- }
18
- </style>
19
- """
20
-
21
- with gr.Blocks(title="Cybersecurity Leaderboard", head=
22
- head_style) as demo:
23
- struct = load_results()
24
- timestamp = struct['time']
25
- EVAL_TIME = format_timestamp(timestamp)
26
- results = struct['results']
27
- benchmark_list=list(results.keys())
28
-
29
- N_DATA = len(benchmark_list)
30
- DATASETS = benchmark_list
31
-
32
- gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME))
33
- structs = [abc.abstractproperty() for _ in range(N_DATA)]
34
-
35
- with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs:
36
- # with gr.TabItem('πŸ… Cybersecurity Main Leaderboard', elem_id='main', id=0):
37
- # gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA))
38
- # _, check_box = BUILD_L1_DF(results, DEFAULT_TASK)
39
- # table = generate_table(results, DEFAULT_TASK)
40
-
41
- # type_map = check_box['type_map']
42
-
43
- # checkbox_group = gr.CheckboxGroup(
44
- # choices=check_box['all'],
45
- # value=check_box['required'],
46
- # label='Aspects of Cybersecurity Work',
47
- # interactive=True,
48
- # )
49
-
50
- # headers = check_box['essential'] + checkbox_group.value
51
- # with gr.Row():
52
- # model_name = gr.Textbox(
53
- # value='Input the Model Name (fuzzy, case insensitive)',
54
- # label='Model Name',
55
- # interactive=True,
56
- # visible=True)
57
- # data_component = gr.components.DataFrame(
58
- # value=table[headers],
59
- # type='pandas',
60
- # datatype=[type_map[x] for x in headers],
61
- # interactive=False,
62
- # wrap=True,
63
- # visible=True)
64
-
65
- # def filter_df(fields, model_name):
66
- # headers = check_box['essential'] + fields
67
- # df = generate_table(results, fields)
68
-
69
- # default_val = 'Input the Model Name (fuzzy, case insensitive)'
70
- # if model_name != default_val:
71
- # print(model_name)
72
- # model_name = model_name.lower()
73
- # method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
74
- # flag = [model_name in name for name in method_names]
75
- # df['TEMP_FLAG'] = flag
76
- # df = df[df['TEMP_FLAG'] == True]
77
- # df.pop('TEMP_FLAG')
78
-
79
- # comp = gr.components.DataFrame(
80
- # value=df[headers],
81
- # type='pandas',
82
- # datatype=[type_map[x] for x in headers],
83
- # interactive=False,
84
- # wrap=True,
85
- # visible=True)
86
- # return comp
87
-
88
- # for cbox in [checkbox_group]:
89
- # cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
90
- # model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
91
-
92
- with gr.TabItem('πŸ” About', elem_id='about', id=1):
93
- with open("about.md", 'r', encoding="utf-8") as file:
94
- gr.Markdown(file.read())
95
-
96
- for i, benchmark in enumerate(benchmark_list):
97
- with gr.TabItem(f'πŸ“Š {benchmark} Leaderboard', elem_id=benchmark, id=i + 2):
98
- if benchmark in LEADERBOARD_MD:
99
- gr.Markdown(LEADERBOARD_MD[benchmark])
100
-
101
- s = structs[i]
102
- s.table, s.check_box = BUILD_L2_DF(results, benchmark)
103
- s.type_map = s.check_box['type_map']
104
-
105
- s.checkbox_group = gr.CheckboxGroup(
106
- choices=s.check_box['all'],
107
- value=s.check_box['required'],
108
- label=f'{benchmark} CheckBoxes',
109
- interactive=True,
110
- )
111
- s.headers = s.check_box['essential'] + s.checkbox_group.value
112
-
113
- if benchmark!='SWE-bench-verified':
114
- with gr.Row():
115
- s.model_name = gr.Textbox(
116
- value='Input the Model Name (fuzzy, case insensitive)',
117
- label='Model Name',
118
- interactive=True,
119
- visible=True)
120
- else:
121
- with gr.Row():
122
- s.model_name = gr.Textbox(
123
- value='Input the Agent Name (fuzzy, case insensitive)',
124
- label='Agent Name',
125
- interactive=True,
126
- visible=True)
127
- s.data_component = gr.components.DataFrame(
128
- value=s.table[s.headers],
129
- type='pandas',
130
- datatype=[s.type_map[x] for x in s.headers],
131
- interactive=False,
132
- wrap=True,
133
- visible=True)
134
- s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False)
135
-
136
- def filter_df_l2(dataset_name, fields, model_name):
137
- s = structs[benchmark_list.index(dataset_name)]
138
- headers = s.check_box['essential'] + fields
139
- df = cp.deepcopy(s.table)
140
- if dataset_name!="SWE-bench-verified":
141
- default_val = 'Input the Model Name (fuzzy, case insensitive)'
142
- else:
143
- default_val = 'Input the Agent Name (fuzzy, case insensitive)'
144
-
145
- if model_name != default_val:
146
- print(model_name)
147
- model_name = model_name.lower()
148
- if dataset_name!="SWE-bench-verified":
149
- method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
150
- else:
151
- method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
152
- flag = [model_name in name for name in method_names]
153
- df['TEMP_FLAG'] = flag
154
- df = df[df['TEMP_FLAG'] == True]
155
- df.pop('TEMP_FLAG')
156
-
157
- comp = gr.components.DataFrame(
158
- value=df[headers],
159
- type='pandas',
160
- datatype=[s.type_map[x] for x in headers],
161
- interactive=False,
162
- wrap=True,
163
- visible=True)
164
- return comp
165
-
166
- for cbox in [s.checkbox_group]:
167
- cbox.change(
168
- fn=filter_df_l2,
169
- inputs=[s.dataset, s.checkbox_group, s.model_name],
170
- outputs=s.data_component)
171
- s.model_name.submit(
172
- fn=filter_df_l2,
173
- inputs=[s.dataset, s.checkbox_group, s.model_name],
174
- outputs=s.data_component)
175
-
176
- with gr.Row():
177
- with gr.Accordion('Citation', open=False):
178
- citation_button = gr.Textbox(
179
- value=CITATION_BUTTON_TEXT,
180
- label=CITATION_BUTTON_LABEL,
181
- elem_id='citation-button')
182
-
183
- if __name__ == '__main__':
184
- demo.launch(server_name='0.0.0.0', share=True)
 
1
+ import abc, sys
2
+ import gradio as gr
3
+
4
+ from gen_table import *
5
+ from meta_data import *
6
+
7
+ # import pandas as pd
8
+ # pd.set_option('display.max_colwidth', 0)
9
+
10
+ head_style = """
11
+ <style>
12
+ @media (min-width: 1536px)
13
+ {
14
+ .gradio-container {
15
+ min-width: var(--size-full) !important;
16
+ }
17
+ }
18
+ </style>
19
+ """
20
+
21
+ with gr.Blocks(title="Frontier AI Cybersecurity Observatory", head=
22
+ head_style) as demo:
23
+ struct = load_results()
24
+ timestamp = struct['time']
25
+ EVAL_TIME = format_timestamp(timestamp)
26
+ results = struct['results']
27
+ benchmark_list=list(results.keys())
28
+
29
+ N_DATA = len(benchmark_list)
30
+ DATASETS = benchmark_list
31
+
32
+ gr.Markdown(LEADERBORAD_INTRODUCTION.format(N_DATA,EVAL_TIME))
33
+ structs = [abc.abstractproperty() for _ in range(N_DATA)]
34
+
35
+ with gr.Tabs(elem_id="leaderboard_tabs", elem_classes='tab-buttons') as tabs:
36
+ # with gr.TabItem('πŸ… Cybersecurity Main Leaderboard', elem_id='main', id=0):
37
+ # gr.Markdown(LEADERBOARD_MD['MAIN'].format(N_DATA,N_DATA))
38
+ # _, check_box = BUILD_L1_DF(results, DEFAULT_TASK)
39
+ # table = generate_table(results, DEFAULT_TASK)
40
+
41
+ # type_map = check_box['type_map']
42
+
43
+ # checkbox_group = gr.CheckboxGroup(
44
+ # choices=check_box['all'],
45
+ # value=check_box['required'],
46
+ # label='Aspects of Cybersecurity Work',
47
+ # interactive=True,
48
+ # )
49
+
50
+ # headers = check_box['essential'] + checkbox_group.value
51
+ # with gr.Row():
52
+ # model_name = gr.Textbox(
53
+ # value='Input the Model Name (fuzzy, case insensitive)',
54
+ # label='Model Name',
55
+ # interactive=True,
56
+ # visible=True)
57
+ # data_component = gr.components.DataFrame(
58
+ # value=table[headers],
59
+ # type='pandas',
60
+ # datatype=[type_map[x] for x in headers],
61
+ # interactive=False,
62
+ # wrap=True,
63
+ # visible=True)
64
+
65
+ # def filter_df(fields, model_name):
66
+ # headers = check_box['essential'] + fields
67
+ # df = generate_table(results, fields)
68
+
69
+ # default_val = 'Input the Model Name (fuzzy, case insensitive)'
70
+ # if model_name != default_val:
71
+ # print(model_name)
72
+ # model_name = model_name.lower()
73
+ # method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
74
+ # flag = [model_name in name for name in method_names]
75
+ # df['TEMP_FLAG'] = flag
76
+ # df = df[df['TEMP_FLAG'] == True]
77
+ # df.pop('TEMP_FLAG')
78
+
79
+ # comp = gr.components.DataFrame(
80
+ # value=df[headers],
81
+ # type='pandas',
82
+ # datatype=[type_map[x] for x in headers],
83
+ # interactive=False,
84
+ # wrap=True,
85
+ # visible=True)
86
+ # return comp
87
+
88
+ # for cbox in [checkbox_group]:
89
+ # cbox.change(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
90
+ # model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name], outputs=data_component)
91
+
92
+ with gr.TabItem('πŸ” About', elem_id='about', id=1):
93
+ with open("about.md", 'r', encoding="utf-8") as file:
94
+ gr.Markdown(file.read())
95
+
96
+ for i, benchmark in enumerate(benchmark_list):
97
+ with gr.TabItem(f'πŸ“Š {benchmark} Leaderboard', elem_id=benchmark, id=i + 2):
98
+ if benchmark in LEADERBOARD_MD:
99
+ gr.Markdown(LEADERBOARD_MD[benchmark])
100
+
101
+ s = structs[i]
102
+ s.table, s.check_box = BUILD_L2_DF(results, benchmark)
103
+ s.type_map = s.check_box['type_map']
104
+
105
+ s.checkbox_group = gr.CheckboxGroup(
106
+ choices=s.check_box['all'],
107
+ value=s.check_box['required'],
108
+ label=f'{benchmark} CheckBoxes',
109
+ interactive=True,
110
+ )
111
+ s.headers = s.check_box['essential'] + s.checkbox_group.value
112
+
113
+ if benchmark!='SWE-bench-verified':
114
+ with gr.Row():
115
+ s.model_name = gr.Textbox(
116
+ value='Input the Model Name (fuzzy, case insensitive)',
117
+ label='Model Name',
118
+ interactive=True,
119
+ visible=True)
120
+ else:
121
+ with gr.Row():
122
+ s.model_name = gr.Textbox(
123
+ value='Input the Agent Name (fuzzy, case insensitive)',
124
+ label='Agent Name',
125
+ interactive=True,
126
+ visible=True)
127
+ s.data_component = gr.components.DataFrame(
128
+ value=s.table[s.headers],
129
+ type='pandas',
130
+ datatype=[s.type_map[x] for x in s.headers],
131
+ interactive=False,
132
+ wrap=True,
133
+ visible=True)
134
+ s.dataset = gr.Textbox(value=benchmark, label=benchmark, visible=False)
135
+
136
+ def filter_df_l2(dataset_name, fields, model_name):
137
+ s = structs[benchmark_list.index(dataset_name)]
138
+ headers = s.check_box['essential'] + fields
139
+ df = cp.deepcopy(s.table)
140
+ if dataset_name!="SWE-bench-verified":
141
+ default_val = 'Input the Model Name (fuzzy, case insensitive)'
142
+ else:
143
+ default_val = 'Input the Agent Name (fuzzy, case insensitive)'
144
+
145
+ if model_name != default_val:
146
+ print(model_name)
147
+ model_name = model_name.lower()
148
+ if dataset_name!="SWE-bench-verified":
149
+ method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
150
+ else:
151
+ method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
152
+ flag = [model_name in name for name in method_names]
153
+ df['TEMP_FLAG'] = flag
154
+ df = df[df['TEMP_FLAG'] == True]
155
+ df.pop('TEMP_FLAG')
156
+
157
+ comp = gr.components.DataFrame(
158
+ value=df[headers],
159
+ type='pandas',
160
+ datatype=[s.type_map[x] for x in headers],
161
+ interactive=False,
162
+ wrap=True,
163
+ visible=True)
164
+ return comp
165
+
166
+ for cbox in [s.checkbox_group]:
167
+ cbox.change(
168
+ fn=filter_df_l2,
169
+ inputs=[s.dataset, s.checkbox_group, s.model_name],
170
+ outputs=s.data_component)
171
+ s.model_name.submit(
172
+ fn=filter_df_l2,
173
+ inputs=[s.dataset, s.checkbox_group, s.model_name],
174
+ outputs=s.data_component)
175
+
176
+ with gr.Row():
177
+ with gr.Accordion('Citation', open=False):
178
+ citation_button = gr.Textbox(
179
+ value=CITATION_BUTTON_TEXT,
180
+ label=CITATION_BUTTON_LABEL,
181
+ elem_id='citation-button')
182
+
183
+ if __name__ == '__main__':
184
+ demo.launch(server_name='0.0.0.0', share=True)