File size: 9,133 Bytes
156ecbd a028319 156ecbd f141a8b e1f2481 156ecbd f141a8b c975f93 f141a8b 156ecbd a028319 1158f50 a028319 1158f50 f141a8b 1158f50 f141a8b 1158f50 a028319 156ecbd a028319 e1f2481 a028319 e1f2481 a028319 156ecbd a028319 156ecbd a028319 156ecbd a028319 156ecbd e1f2481 a028319 e1f2481 a028319 156ecbd a028319 156ecbd a028319 156ecbd a028319 156ecbd a028319 e1f2481 1158f50 a028319 156ecbd a028319 156ecbd a028319 1158f50 e1f2481 a028319 e1f2481 a028319 e1f2481 156ecbd a028319 e1f2481 a028319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
from os import listdir
from os.path import isfile, join
import pandas as pd
import streamlit as st # pylint: disable=import-error
import graphs
from streamlit_helpers import add_filter, slider_filter, Collapsable
st.set_page_config(
page_title="MLAgility Tracker",
page_icon="⚡",
layout="wide",
)
# dashboard title
st.title("MLAgility Tracker ⚡")
st.warning(
(
"MLAgility is under active development and we are currently working on a list of critical data "
"validation tasks available at github.com/groq/mlagility/labels/validation. We are sharing this "
"dashboard and the data within for the sole purpose of gathering early feedback. See our FAQ below "
"for more details about license and liability. For feedback please email "
"[email protected]."
),
icon="⚠️",
)
def add_faq() -> None:
"""
Displays FAQ using Collapsable sections
"""
faq = Collapsable()
faq.add_section(
"How is MLAgility different from MLPerf?",
(
"Deep learning pioneers have been judging their progress with the Machine Learning "
"Performance (MLPerf) inference benchmark, but have found that the corpus of models "
"is small enough that it allows vendors to primarily compete by hand-optimizing "
"kernels. MLAgility offers a complementary approach to MLPerf by examining the "
"capability of vendors to provide turnkey solutions to a larger corpus of "
"off-the-shelf models. By providing a workflow that is representative of the "
"mass adoption customer on a variety of ML accelerators and effectively disallowing "
"hand-crafted kernels, MLAgility bridges the gap between MLPerf and the mass adoption "
"of hardware acceleration."
),
)
faq.add_section(
"Why now for MLAgility?",
(
"Deep learning algorithms and their associated DL hardware accelerators are "
"transitioning from early adoption into mass adoption. Production DL is now "
"becoming available to the masses, with a desire to customize models to tackle "
"their specific problems, and then take the path of least resistance into "
"production. A market for turnkey solutions, starting with a model as input and "
"provision a cost- and latency-effective acceleration solution, often in the cloud, "
"as output, has emerged."
),
)
faq.add_section(
"Which tool was used to generate those results?",
(
"All MLAgility results have been generated using the <b>benchit</b> tool v1.0.0, which is part "
"of the MLAgility Github Repository. You can learn more about it "
'<a href="https://github.com/groq/mlagility">here</a>.'
),
)
faq.add_section(
"What is the experimental setup for each of the devices?",
[
"<b>x86</b>: Intel(R) Xeon(R) X40 CPU @ 2.00GHz on Google Cloud (custom: n2, 80 vCPU, 64.00 GiB) and OnnxRuntime version 1.14.0.",
"<b>nvidia</b>: NVIDIA A100 40GB on Google Cloud (a2-highgpu-1g) and TensorRT version 22.12-py3.",
"<b>groq</b>: GroqChip 1 on selfhosted GroqNode server, GroqFlow version 3.0.2 TestPyPI package, and a pre-release of GroqWare™ Suite version 0.10.0.",
(
"You can find more details about the methodology "
'<a href="https://github.com/groq/mlagility/blob/main/docs/tools_user_guide.md">here</a>.'
),
],
)
faq.add_section(
"What are the current key limitations of those results?",
[
(
"Groq's latency is computed using GroqModel.estimate_latency(), which takes"
" into account deterministic compute time and estimates an ideal runtime with"
" ideal I/O time. It does not take into account runtime performance."
),
(
"Results currently only represent batch 1 performance on a limited number of models, "
"devices, vendors, and runtimes. You can learn more about future directions by reading "
'the "What are the future directions of MLAgility?" FAQ section.'
),
(
"Results are currently being validated. You can have a look at our current validation "
"tasks and other limitations "
'<a href="https://github.com/groq/mlagility/labels/validation">here</a>.'
),
],
)
faq.add_section(
"What are the future directions of MLAgility?",
[
"Include additional classes of models (e.g. LLMs, GNNs, DLRMs).",
"Perform experiments that include sweeps over batch and input sizes.",
"Increase the number of devices from existing vendors (e.g. T4, A10, and H100).",
"Include devices from additional vendors (e.g. ARM, and AMD)."
"Include the number of runtimes supported (e.g. ORT and PyTorch for CUDA, PyTorch for x86).",
],
)
faq.add_section(
"Who runs MLAgility?",
(
"MLAgility is currently maintained by the following individuals (in alphabetical order): "
"Daniel Holanda Noronha, Jeremy Fowers, Kalin Ovtcharov, and Ramakrishnan Sivakumar. We are actively seeking collaborators from across the industry."
),
)
faq.add_section(
"License and Liability",
(
'THE MLAGILITY BENCHMARK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR '
"IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, "
"FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE "
"AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER "
"LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, "
"OUT OF OR IN CONNECTION WITH THE BENCHMARK OR THE USE OR OTHER DEALINGS IN THE "
"BENCHMARK. Read more about it "
'<a href="https://github.com/groq/mlagility/blob/main/LICENSE">here</a>.'
),
)
faq.deploy()
# Add all filters to sidebar
with st.sidebar:
st.markdown("# Filters")
# Get all reports of a given test type
REPORT_FOLDER = "reports"
reports = sorted(
[f for f in listdir(REPORT_FOLDER) if isfile(join(REPORT_FOLDER, f))]
)
# Select and read a report
selected_report = st.selectbox("Test date", reports, index=len(reports) - 1)
selected_report_idx = reports.index(selected_report)
report = pd.read_csv(f"{REPORT_FOLDER}/{selected_report}")
# Convert int parameters to int/float
for p in ["groq_chips_used", "params"]:
report[p] = report[p].replace("-", 0).astype("int64")
# Add parameter filter
st.markdown("#### Parameters")
report = slider_filter(
[report], "Select a range parameters (in millions)", filter_by="params"
)[0]
# Add author filter
report = add_filter(
[report],
"Origin",
label="author",
num_cols=2,
)[0]
# Add task filter
report = add_filter([report], "Tasks", label="task", options=None)[0]
st.markdown("## Summary Results")
graphs.device_funnel(report)
st.markdown("""#### Benchmark results""")
baseline = st.selectbox("Baseline", ("x86", "nvidia", "groq"))
graphs.speedup_text_summary(report, baseline)
graphs.speedup_bar_chart(report, baseline)
cols = st.columns(2)
with cols[0]:
st.markdown("""#### Workload origin""")
graphs.workload_origin(report)
with cols[1]:
st.markdown("""#### Parameter Size Distribution""")
graphs.parameter_histogram(report, show_assembled=False)
# FAQ Block
st.markdown("""## About this workload analysis (FAQ)""")
add_faq()
# Detailed data view (table)
st.markdown("## Detailed Data View")
# Add columns that do not exist yet
report["gpu_chips_used"] = 1
report["cpu_chips_used"] = 1
# Using 3 significant digits
report["groq_estimated_latency"] = [
"-" if x == "-" else "{:.3f}".format(float(x))
for x in report["groq_estimated_latency"]
]
report["nvidia_latency"] = [
"-" if x == "-" else "{:.3f}".format(float(x)) for x in report["nvidia_latency"]
]
report["x86_latency"] = [
"-" if x == "-" else "{:.3f}".format(float(x)) for x in report["x86_latency"]
]
renamed_cols = {
"model_name": "Model Name",
"author": "Source",
"params": "Parameters",
"groq_estimated_latency": "GroqChip 1: Latency (ms)",
"nvidia_latency": "NVIDIA A100-PCIE-40GB: Latency (ms)",
"x86_latency": "Intel(R) Xeon(R) x40 CPU: Latency (ms)",
"groq_chips_used": "GroqChip 1: Chips Used",
"gpu_chips_used": "NVIDIA A100-PCIE-40GB: Chips Used",
"cpu_chips_used": "Intel(R) Xeon(R) x40 CPU: Chips Used",
}
report.rename(columns=renamed_cols, inplace=True)
selected_cols = list(renamed_cols.values())
graphs.results_table(report[selected_cols]) # pylint: disable=unsubscriptable-object
|