File size: 9,583 Bytes
e47d0b2
 
ab5f5f1
 
 
76b423c
 
 
 
 
ab5f5f1
 
51a4daf
 
 
 
 
 
 
ab5f5f1
51a4daf
 
 
 
e47d0b2
 
 
 
8766911
51a4daf
 
 
 
 
 
 
e47d0b2
 
 
 
 
 
 
 
a8a6326
 
4f5bf6c
a8a6326
 
 
 
 
 
4f5bf6c
a8a6326
 
 
 
 
 
 
 
76b423c
a8a6326
 
e47d0b2
 
a8a6326
 
 
 
 
 
7ecfa5a
a8a6326
 
 
 
 
 
 
76b423c
e47d0b2
 
a8a6326
 
 
4f5bf6c
 
a8a6326
 
e47d0b2
 
a8a6326
 
 
 
4f5bf6c
 
 
e47d0b2
 
4f5bf6c
 
 
 
a8a6326
 
 
 
 
ab5f5f1
 
 
 
51a4daf
 
 
 
ab5f5f1
 
 
 
 
 
4f5bf6c
ab5f5f1
 
 
7ecfa5a
ab5f5f1
e47d0b2
51a4daf
 
0232cf1
 
 
51a4daf
7ecfa5a
 
ab5f5f1
4f5bf6c
0232cf1
a8a6326
0232cf1
ab5f5f1
51a4daf
 
 
7ecfa5a
 
 
 
51a4daf
7ecfa5a
 
 
4f5bf6c
7ecfa5a
 
ab5f5f1
7ecfa5a
51a4daf
7ecfa5a
 
76b423c
 
 
 
 
 
ab5f5f1
 
7ecfa5a
 
76b423c
 
 
 
 
 
ab5f5f1
 
 
 
 
 
0232cf1
51a4daf
 
 
 
0232cf1
ab5f5f1
 
 
 
 
 
4f5bf6c
0232cf1
a8a6326
0232cf1
ab5f5f1
 
 
76b423c
 
 
 
 
 
ab5f5f1
 
7ecfa5a
ab5f5f1
0232cf1
51a4daf
 
 
 
0232cf1
 
 
ab5f5f1
 
 
 
4f5bf6c
0232cf1
a8a6326
0232cf1
ab5f5f1
 
 
 
76b423c
 
 
 
 
 
ab5f5f1
 
a8a6326
 
51a4daf
 
 
7ecfa5a
51a4daf
 
 
 
 
 
7ecfa5a
76b423c
0232cf1
 
 
29307cd
a8a6326
 
 
 
 
0232cf1
51a4daf
 
 
 
0232cf1
a8a6326
0232cf1
a8a6326
 
 
 
7ecfa5a
51a4daf
 
 
 
 
 
 
 
0232cf1
 
 
7ecfa5a
51a4daf
 
 
 
 
 
 
 
a8a6326
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
from typing import List

import gradio as gr

from src.leaderboard import get_leaderboard_df
from src.llm_perf import get_llm_perf_df

# from attention_implementations import get_attn_decode_fig, get_attn_prefill_fig
# from custom_kernels import get_kernel_decode_fig, get_kernel_prefill_fig
from src.map import get_lat_score_mem_fig


def create_control_panel(
    machine: str,
    subsets: List[str],
    backends: List[str],
    hardware_provider: str,
    hardware_type: str,
):
    # controls
    machine_value = gr.State(value=machine)
    subsets_value = gr.State(value=subsets)
    backends_value = gr.State(value=backends)
    hardware_type_value = gr.State(value=hardware_type)

    if hardware_provider == "nvidia":
        backends = ["pytorch"]
        attention_implementations = ["Eager", "SDPA", "FAv2"]
        quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit", "torchao.4bit"]
        kernels = [
            "No Kernel",
            "GPTQ.ExllamaV1",
            "GPTQ.ExllamaV2",
            "AWQ.GEMM",
            "AWQ.GEMV",
        ]
    elif hardware_provider == "intel":
        backends = ["pytorch", "onnxruntime", "openvino"]
        attention_implementations = ["Eager"]
        quantizations = ["Unquantized"]
        kernels = ["No Kernel"]
    else:
        raise ValueError(f"Unknown hardware provider: {hardware_provider}")

    with gr.Accordion("Control Panel πŸŽ›οΈ", open=False, elem_id="control-panel"):
        with gr.Row():
            with gr.Column(scale=2, variant="panel"):
                score_slider = gr.Slider(
                    label="Open LLM Score (%) πŸ“ˆ",
                    info="🎚️ Slide to minimum Open LLM score",
                    value=0,
                    elem_id="threshold-slider",
                )
            with gr.Column(scale=2, variant="panel"):
                memory_slider = gr.Slider(
                    label="Peak Memory (MB) πŸ“ˆ",
                    info="🎚️ Slide to maximum Peak Memory",
                    minimum=0,
                    maximum=80 * 1024,
                    value=80 * 1024,
                    elem_id="memory-slider",
                )
            with gr.Column(scale=1, variant="panel"):
                backend_checkboxes = gr.CheckboxGroup(
                    label="Backends 🏭",
                    choices=backends,
                    value=backends,
                    info="β˜‘οΈ Select the backends",
                    elem_id="backend-checkboxes",
                )
        with gr.Row():
            with gr.Column(scale=1, variant="panel"):
                datatype_checkboxes = gr.CheckboxGroup(
                    label="Precision πŸ“₯",
                    choices=["float32", "float16", "bfloat16"],
                    value=["float32", "float16", "bfloat16"],
                    info="β˜‘οΈ Select the load data types",
                    elem_id="dtype-checkboxes",
                )
            with gr.Column(scale=1, variant="panel"):
                optimization_checkboxes = gr.CheckboxGroup(
                    label="Attentions πŸ‘οΈ",
                    choices=attention_implementations,
                    value=attention_implementations,
                    info="β˜‘οΈ Select the optimization",
                    elem_id="optimization-checkboxes",
                )
        with gr.Row():
            with gr.Column(scale=1, variant="panel"):
                quantization_checkboxes = gr.CheckboxGroup(
                    label="Quantizations πŸ—œοΈ",
                    choices=quantizations,
                    value=quantizations,
                    info="β˜‘οΈ Select the quantization schemes",
                    elem_id="quantization-checkboxes",
                    elem_classes="boxed-option",
                )
            with gr.Column(scale=1, variant="panel"):
                kernels_checkboxes = gr.CheckboxGroup(
                    label="Kernels βš›οΈ",
                    choices=kernels,
                    value=kernels,
                    info="β˜‘οΈ Select the custom kernels",
                    elem_id="kernel-checkboxes",
                    elem_classes="boxed-option",
                )
        with gr.Row():
            filter_button = gr.Button(
                value="Filter πŸš€",
                elem_id="filter-button",
                elem_classes="boxed-option",
            )

    return (
        filter_button,
        machine_value,
        backends_value,
        hardware_type_value,
        subsets_value,
        score_slider,
        memory_slider,
        backend_checkboxes,
        datatype_checkboxes,
        optimization_checkboxes,
        quantization_checkboxes,
        kernels_checkboxes,
    )


def filter_rows_fn(
    machine,
    subsets,
    backends,
    hardware_type,
    # inputs
    score,
    memory,
    backend_checkboxes,
    precisions,
    attentions,
    quantizations,
    kernels,
    # interactive
    columns,
    search,
):
    llm_perf_df = get_llm_perf_df(
        machine=machine, subsets=subsets, backends=backends, hardware_type=hardware_type
    )
    # print(attentions)
    # print(llm_perf_df["Attention πŸ‘οΈ"].unique())
    filtered_llm_perf_df = llm_perf_df[
        llm_perf_df["Model πŸ€—"].str.contains(search, case=False)
        & llm_perf_df["Backend 🏭"].isin(backend_checkboxes)
        & llm_perf_df["Precision πŸ“₯"].isin(precisions)
        & llm_perf_df["Attention πŸ‘οΈ"].isin(attentions)
        & llm_perf_df["Quantization πŸ—œοΈ"].isin(quantizations)
        & llm_perf_df["Kernel βš›οΈ"].isin(kernels)
        & (llm_perf_df["Open LLM Score (%)"] >= score)
        & (llm_perf_df["Memory (MB)"] <= memory)
    ]
    selected_filtered_llm_perf_df = select_columns_fn(
        machine, subsets, backends, hardware_type, columns, search, filtered_llm_perf_df
    )
    selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
    # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
    # filtered_bt_decode_fig = get_bt_decode_fig(filtered_df)
    # filtered_fa2_prefill_fig = get_fa2_prefill_fig(filtered_df)
    # filtered_fa2_decode_fig = get_fa2_decode_fig(filtered_df)
    # filtered_quant_prefill_fig = get_quant_prefill_fig(filtered_df)
    # filtered_quant_decode_fig = get_quant_decode_fig(filtered_df)

    return [
        selected_filtered_llm_perf_df,
        selected_filtered_lat_score_mem_fig,
        # filtered_bt_prefill_fig,
        # filtered_bt_decode_fig,
        # filtered_fa2_prefill_fig,
        # filtered_fa2_decode_fig,
        # filtered_quant_prefill_fig,
        # filtered_quant_decode_fig,
    ]


def create_control_callback(
    # button
    filter_button,
    # fixed
    machine_value,
    subsets_value,
    backends_value,
    hardware_type_value,
    # inputs
    score_slider,
    memory_slider,
    backend_checkboxes,
    datatype_checkboxes,
    optimization_checkboxes,
    quantization_checkboxes,
    kernels_checkboxes,
    # interactive
    columns_checkboxes,
    search_bar,
    # outputs
    leaderboard_table,
    lat_score_mem_plot,
    # attn_prefill_plot,
    # attn_decode_plot,
    # fa2_prefill_plot,
    # fa2_decode_plot,
    # quant_prefill_plot,
    # quant_decode_plot,
):
    filter_button.click(
        fn=filter_rows_fn,
        inputs=[
            # fixed
            machine_value,
            subsets_value,
            backends_value,
            hardware_type_value,
            # inputs
            score_slider,
            memory_slider,
            backend_checkboxes,
            datatype_checkboxes,
            optimization_checkboxes,
            quantization_checkboxes,
            kernels_checkboxes,
            # interactive
            columns_checkboxes,
            search_bar,
        ],
        outputs=[
            leaderboard_table,
            lat_score_mem_plot,
            # attn_prefill_plot,
            # attn_decode_plot,
            # fa2_prefill_plot,
            # fa2_decode_plot,
            # quant_prefill_plot,
            # quant_decode_plot,
        ],
    )


def select_columns_fn(
    machine, subsets, backends, hardware_type, columns, search, llm_perf_df=None
):
    if llm_perf_df is None:
        llm_perf_df = get_llm_perf_df(
            machine=machine,
            subsets=subsets,
            backends=backends,
            hardware_type=hardware_type,
        )

    selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
    selected_leaderboard_df = selected_leaderboard_df[
        selected_leaderboard_df["Model πŸ€—"].str.contains(search, case=False)
    ]
    selected_leaderboard_df = selected_leaderboard_df[columns]

    return selected_leaderboard_df


def create_select_callback(
    # fixed
    machine_value,
    subsets_value,
    backends_value,
    hardware_type_value,
    # interactive
    columns_checkboxes,
    search_bar,
    # outputs
    leaderboard_table,
):
    columns_checkboxes.change(
        fn=select_columns_fn,
        inputs=[
            machine_value,
            subsets_value,
            backends_value,
            hardware_type_value,
            columns_checkboxes,
            search_bar,
        ],
        outputs=[leaderboard_table],
    )
    search_bar.change(
        fn=select_columns_fn,
        inputs=[
            machine_value,
            subsets_value,
            backends_value,
            hardware_type_value,
            columns_checkboxes,
            search_bar,
        ],
        outputs=[leaderboard_table],
    )