Spaces:

Groq
/

mlagility

Runtime error

File size: 19,931 Bytes

import time  # to simulate a real time data, time loop
from os import listdir
from os.path import isfile, join
import numpy as np  # np mean, np random
import pandas as pd  # read csv, df manipulation
from collections import Counter
import plotly.express as px  # interactive charts
from plotly import graph_objs as go
import streamlit as st  # 🎈 data web app development
import plotly.figure_factory as ff
import numpy as np
from collections import Counter
from streamlit_echarts import st_echarts

print("Make sure to activate your VPN before running this script")

st.set_page_config(
    page_title="ML Agility tracker",
    page_icon="⚡",
    layout="wide",
)


# Session State variables:
state = st.session_state
if "INFO_CLOSED" not in state:
    state.INFO_CLOSED = False

# dashboard title
st.title("ML Agility Tracker ⚡")

# Custom chart colors (https://plotly.com/python/discrete-color/)
colorway = [
    "#5470c6",
    "#FF7F0E",
    "#94cc74",
    "#92cb75",
    "#fac858",
    "#ee6666",
    "#73c0de",
    "#3ba272",
]
# colorway = ["#3366cc", "#FF7F0E"]

st.markdown(
    "Machine Learning Agility (MLAgility) measures vendor progress towards providing this turnkey solution to their customers. For more details, please visit [mlagility.org](mlagility.org).",
    unsafe_allow_html=True,
)


def add_filter(
    data_frame_list, name, label, options=None, num_cols=1, last_is_others=True
):

    # Get list of all options and return if no options are available
    all_options = set(data_frame_list[-1][label])
    if "-" in all_options:
        all_options.remove("-")
    if len(all_options) == 0:
        return data_frame_list

    st.markdown(f"#### {name}")

    # Create list of options if selectable options are not provided
    if options is None:
        options_dict = Counter(data_frame_list[-1][label])
        sorted_options = sorted(options_dict, key=options_dict.get, reverse=True)
        if "-" in sorted_options:
            sorted_options.remove("-")
        if len(sorted_options) > 8:
            options = list(sorted_options[:7]) + ["others"]
            last_is_others = True
        else:
            options = list(sorted_options)
            last_is_others = False

    cols = st.columns(num_cols)
    instantiated_checkbox = []
    for idx in range(len(options)):
        with cols[idx % num_cols]:
            instantiated_checkbox.append(
                st.checkbox(options[idx], False, key=f"{label}_{options[idx]}")
            )

    selected_options = [
        options[idx] for idx, checked in enumerate(instantiated_checkbox) if checked
    ]

    # The last checkbox will always correspond to "other"
    if instantiated_checkbox[-1] and last_is_others:
        selected_options = selected_options[:-1]
        other_options = [x for x in all_options if x not in options]
        selected_options = set(selected_options + other_options)

    if len(selected_options) > 0:
        for idx in range(len(data_frame_list)):
            data_frame_list[idx] = data_frame_list[idx][
                [
                    any([x == model_entry for x in selected_options])
                    for model_entry in data_frame_list[idx][label]
                ]
            ]
    return data_frame_list


def parameter_filter(data_frame_list):

    st.markdown(f"#### Parameters")

    start_params, end_params = st.select_slider(
        "Select a range parameters (in millions)",
        options=[str(x) for x in np.arange(0, 1001, 10, dtype=int)],
        value=("0", "1000"),
    )

    for idx in range(len(data_frame_list)):
        data_frame_list[idx] = data_frame_list[idx][
            [
                int(model_entry) >= int(start_params) * 1000000
                and int(model_entry) <= int(end_params) * 1000000
                for model_entry in data_frame_list[idx]["params"]
            ]
        ]

    return data_frame_list


with st.sidebar:

    st.markdown("# Filters")

    selected_test_type = "mlagility"
    report_folder = "reports/mlagility"
    # Get ML Agility reports
    reports = sorted(
        [f for f in listdir(report_folder) if isfile(join(report_folder, f))]
    )

    selected_report = st.selectbox("Test date", reports, index=len(reports) - 1)
    selected_report_idx = reports.index(selected_report)
    prev_report = reports[max(0, selected_report_idx - 1)]
    mla_report = pd.read_csv(f"{report_folder}/{selected_report}")
    prev_mla_report = pd.read_csv(f"{report_folder}/{prev_report}")

    # Convert int parameters to int/float
    for p in ["chips_used", "cycles", "params"]:
        mla_report[p] = mla_report[p].replace("-", 0).astype("int64")
        prev_mla_report[p] = prev_mla_report[p].replace("-", 0).astype("int64")

    # Parameter filter
    mla_report, prev_mla_report = parameter_filter([mla_report, prev_mla_report])

    # Add author filter
    authors = (
        [
            "google",
            "apple",
            "facebook",
            "openai",
            "microsoft",
            "huggingface",
            "CompVis",
            "others",
        ]
        if selected_test_type == "monthly"
        else None
    )
    mla_report, prev_mla_report = add_filter(
        [mla_report, prev_mla_report],
        "Authors",
        label="author",
        options=authors,
        num_cols=2,
    )

    # Add task filter
    tasks = [
        "Image Classification",
        "Translation",
        "Image Segmentation",
        "Fill-Mask",
        "Text-to-Image",
        "Token Classification",
        "Sentence Similarity",
        "Audio Classification",
        "Question Answering",
        "Summarization",
        "other",
    ]
    tasks = None
    mla_report, prev_mla_report = add_filter(
        [mla_report, prev_mla_report], "Tasks", label="task", options=tasks
    )


def detailed_progress_list(df_new, df_old, filter=None):
    return
    """
    if filter is not None:
        df_new = df_new[(df_new[filter] == True)]
        df_old = df_old[(df_old[filter] == True)]

    progress = df_new[~(df_new["hash"].isin(df_old["hash"]))].reset_index(drop=True)
    regression = df_old[~(df_old["hash"].isin(df_new["hash"]))].reset_index(drop=True)

    for model_name in progress["model_name"]:
        st.markdown(
            f'<span style="color:green">↑ {model_name}</span>',
            unsafe_allow_html=True,
        )
    for model_name in regression["model_name"]:
        st.markdown(
            f'<span style="color:red">↓ {model_name}</span>',
            unsafe_allow_html=True,
        )
    """


# creating a single-element container
placeholder = st.empty()

with placeholder.container():

    st.markdown("## Summary Results")

    all_models = len(mla_report)
    base_onnx = np.sum(mla_report["base_onnx"])
    optimized_onnx = np.sum(mla_report["optimized_onnx"])
    all_ops_supported = np.sum(mla_report["all_ops_supported"])
    fp16_onnx = np.sum(mla_report["fp16_onnx"])
    compiles = np.sum(mla_report["compiles"])
    assembles = np.sum(mla_report["assembles"])

    # Pie chart for showing origin of models
    # based on https://echarts.apache.org/examples/en/editor.html?c=pie-simple

    all_authors = list(mla_report.loc[:, "author"])
    try:
        all_sources = list(mla_report.loc[:, "model_type"])
    except KeyError:
        all_sources = []
    all_sources = []
    author_count = {i: all_authors.count(i) for i in all_authors}
    sources_count = {i: all_sources.count(i) for i in all_sources}

    cols = st.columns(2)
    with cols[0]:
        st.markdown("""#### Workload origin""")

        options = {
            "darkMode": "true",
            "textStyle": {"fontSize": 16},
            "tooltip": {"trigger": "item"},
            "series": [
                {
                    "name": "Access From",
                    "type": "pie",
                    "radius": [0, "30%"],
                    "label": {"position": "inner", "fontSize": 14},
                    "labelLine": {"show": "false"},
                    "data": [
                        {"value": sources_count[k], "name": k}
                        for k in sources_count.keys()
                    ],
                },
                {
                    "name": "Name of corpus:",
                    "type": "pie",
                    "radius": ["70%", "70%"],
                    "data": [
                        {"value": author_count[k], "name": k}
                        for k in author_count.keys()
                    ],
                    "label": {
                        "formatter": "{b}\n{d}%",
                    },
                },
                {
                    "name": "Name of corpus:",
                    "type": "pie",
                    "radius": ["50%", "70%"],
                    "data": [
                        {"value": author_count[k], "name": k}
                        for k in author_count.keys()
                    ],
                    "emphasis": {
                        "itemStyle": {
                            "shadowBlur": 10,
                            "shadowOffsetX": 0,
                            "shadowColor": "rgba(0, 0, 0, 0.5)",
                        }
                    },
                    "label": {
                        "position": "inner",
                        "formatter": "{c}",
                        "color": "black",
                        "textBorderWidth": 0,
                    },
                },
                {
                    # Show total number of models inside
                    "name": "Total number of models:",
                    "type": "pie",
                    "radius": ["0%", "0%"],
                    "data": [{"value": all_models, "name": "Total"}],
                    "silent": "true",
                    "label": {
                        "position": "inner",
                        "formatter": "{c}",
                        "color": "white",
                        "fontSize": 30,
                        "textBorderWidth": 0,
                    },
                },
            ],
        }
        st_echarts(
            options=options,
            height="400px",
        )

    with cols[1]:
        # Add parameters histogram
        all_models = [float(x) / 1000000 for x in mla_report["params"] if x != "-"]

        hist_data = []
        group_labels = []
        if all_models != []:
            hist_data.append(all_models)
            group_labels.append("All models")

        st.markdown("""#### Parameter Size Distribution""")

        if hist_data != []:
            fig = ff.create_distplot(
                hist_data,
                group_labels,
                bin_size=25,
                histnorm="",
                colors=colorway,
                curve_type="normal",
            )
            fig.layout.update(xaxis_title="Parameters in millions")
            fig.layout.update(yaxis_title="count")
            fig.update_xaxes(range=[1, 1000])
            st.plotly_chart(fig, use_container_width=True)

        else:
            st.markdown(
                """At least one model needs to reach the compiler to show this graph 😅"""
            )

    if "tsp_gpu_compute_ratio" in mla_report and "tsp_gpu_e2e_ratio" in mla_report:
        cols = st.columns(2)
        with cols[0]:
            # GPU Acceleration plot
            st.markdown("""#### Benchmark results (latency)""")

            # Prepare data
            df = mla_report[
                [
                    "model_name",
                    "tsp_estimated_e2e_latency",
                    "gpu_e2e_latency",
                ]
            ]
            df = df.sort_values(by=["model_name"])
            df = df[(df.tsp_estimated_e2e_latency != "-")]
            df = df[(df.gpu_e2e_latency != "-")]
            df["tsp_estimated_e2e_latency"] = df["tsp_estimated_e2e_latency"].astype(
                float
            )
            df["gpu_e2e_latency"] = df["gpu_e2e_latency"].astype(float)

            if len(df) == 0 and assembles > 0:
                st.markdown(
                    (
                        "We do not have GPU numbers for the model(s) mapped to the GroqChip."
                        " This is potentially due to lack of out-of-the-box TensorRT support."
                    )
                )
            elif assembles == 0:
                st.markdown(
                    "Nothing to show here since no models have been successfully assembled."
                )
            else:
                # Coming up with artificial data for now
                df["cpu_latency"] = (
                    df["tsp_estimated_e2e_latency"] + df["gpu_e2e_latency"]
                ) * 10
                df["tsp_cpu_compute_ratio"] = (
                    df["cpu_latency"] / df["tsp_estimated_e2e_latency"]
                )
                df["gpu_cpu_compute_ratio"] = df["cpu_latency"] / df["gpu_e2e_latency"]
                data = [
                    go.Bar(
                        x=df["model_name"],
                        y=df["gpu_cpu_compute_ratio"],
                        name="NVIDIA A100",
                    ),
                    go.Bar(
                        x=df["model_name"],
                        y=df["tsp_cpu_compute_ratio"],
                        name="GroqChip 1",
                    ),
                    go.Bar(
                        x=df["model_name"],
                        y=df["cpu_latency"] * 0 + 1,
                        name="Intel(R) Xeon(R)",
                    ),
                ]

                layout = go.Layout(
                    barmode="overlay",  # group
                    legend={
                        "orientation": "h",
                        "xanchor": "center",
                        "x": 0.5,
                        "y": 1.2,
                    },
                    yaxis_title="Latency Speedup",
                    colorway=[colorway[2], colorway[1], colorway[0]],
                    height=500,
                )

                fig = dict(data=data, layout=layout)
                st.plotly_chart(fig, use_container_width=True)

                st.markdown(
                    "<sup>*</sup>Estimated I/O does NOT include delays caused by Groq's runtime.",
                    unsafe_allow_html=True,
                )
                st.markdown(
                    "<sup>†</sup>Baseline corresponds to Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz.",
                    unsafe_allow_html=True,
                )

                with cols[1]:
                    # Show stats
                    st.markdown(
                        f"""<br><br><br><br>
                        <p style="font-family:sans-serif; font-size: 20px;text-align: center;">Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz Acceleration:</p>
                        <p style="font-family:sans-serif; color:{colorway[0]}; font-size: 26px;text-align: center;"> {1}x (Baseline)</p>
                        <br><br>
                        <p style="font-family:sans-serif; font-size: 20px;text-align: center;">NVIDIA A100-PCIE-40GB Acceleration:</p>
                        <p style="font-family:sans-serif; color:{colorway[2]}; font-size: 26px;text-align: center;"> {round(df["gpu_cpu_compute_ratio"].mean(),2)}x</p>
                        <p style="font-family:sans-serif; color:{colorway[2]}; font-size: 20px;text-align: center;"> min {round(df["gpu_cpu_compute_ratio"].min(),2)}x; max {round(df["gpu_cpu_compute_ratio"].max(),2)}x</p>
                        <br><br>
                        <p style="font-family:sans-serif; font-size: 20px;text-align: center;">GroqChip 1 Acceleration<sup>*</sup>:</p>
                        <p style="font-family:sans-serif; color:{colorway[1]}; font-size: 26px;text-align: center;"> {round(df["tsp_cpu_compute_ratio"].mean(),2)}x</p>
                        <p style="font-family:sans-serif; color:{colorway[1]}; font-size: 20px;text-align: center;"> min {round(df["tsp_cpu_compute_ratio"].min(),2)}x; max {round(df["tsp_cpu_compute_ratio"].max(),2)}x</p>""",
                        unsafe_allow_html=True,
                    )

    # FAQ Block
    cols = st.columns(2)
    with cols[0]:

        st.markdown(
            """<style>
        .big-font {
            font-size:20px !important;
        }
        </style>
        """,
            unsafe_allow_html=True,
        )

        class Collapsable:
            def __init__(self, preamble="", epilogue=""):
                self.preamble = preamble
                self.epilogue = epilogue
                self.sections = []

            def add_section(self, heading, text):
                self.sections.append((heading, text))

            def deploy(self):
                small_font = 18
                large_font = 18
                secs = "".join(
                    [
                        (
                            f"<details><summary style='font-size:{large_font}px;'>{heading}</summary>"
                            f"<blockquote><details><summary style='font-size:{small_font}px;max-width: 80%;'>{text}</summary>"
                            f"<blockquote></blockquote></details></blockquote></details>"
                        )
                        for heading, text in self.sections
                    ]
                )
                collapsable_sec = f"""
                <ol>
                {self.preamble}
                {secs}
                {self.epilogue}
                </ol>
                """
                st.markdown(collapsable_sec, unsafe_allow_html=True)

        st.markdown("""## About this workload analysis (FAQ)""")
        faq = Collapsable()
        faq.add_section(
            "Model selection",
            'The models that are part of the "ML Agility" set are models that have been internally selected and represent a mix between popular open-source models and models that Groq has historically focused some efforts on (like GNNs).',
        )
        faq.add_section(
            "Experimental Setup",
            "-",
        )
        faq.add_section(
            "Key limitations",
            "This set of workloads does not include models with more than 1B parametes.",
        )

        faq.deploy()
        st.markdown(
            "For more details, please visit [mlagility.org](mlagility.org).",
            unsafe_allow_html=True,
        )

    st.markdown("## Detailed Data View")

    model_name = st.text_input("", placeholder="Filter model by name")
    if model_name != "":
        mla_report = mla_report[[model_name in x for x in mla_report["model_name"]]]

    # Add columns that do not exist yet
    mla_report["chips_used_gpu"] = 1
    mla_report["cpu_latency"] = 0
    mla_report["chips_used_cpu"] = 0

    # Using 2 significant digits
    mla_report["tsp_estimated_e2e_latency"] = [
        "-" if x == "-" else "{:.3f}".format(float(x))
        for x in mla_report["tsp_estimated_e2e_latency"]
    ]
    mla_report["gpu_e2e_latency"] = [
        "-" if x == "-" else "{:.3f}".format(float(x))
        for x in mla_report["gpu_e2e_latency"]
    ]

    renamed_cols = {
        "model_name": "Model Name",
        "author": "Source",
        "params": "Parameters",
        "model_type": "Framework",
        "tsp_estimated_e2e_latency": "GroqChip 1: Latency (ms)",
        "gpu_e2e_latency": "NVIDIA A100-PCIE-40GB: Latency (ms)",
        "cpu_latency": "Intel(R) Xeon(R) Gold 6338 CPU: Latency (ms)",
        "chips_used": "GroqChip 1: Chips Used",
        "chips_used_gpu": "NVIDIA A100-PCIE-40GB: Chips Used",
        "chips_used_cpu": "Intel(R) Xeon(R) Gold 6338 CPU: Chips Used",
    }
    mla_report.rename(columns=renamed_cols, inplace=True)
    selected_cols = renamed_cols.values()

    st.dataframe(
        mla_report[selected_cols],
        height=min((len(mla_report) + 1) * 35, 35 * 21),
        use_container_width=True,
    )