Spaces:

facat
/

ml-summit

Runtime error

File size: 11,258 Bytes

2fc4496

# %%

# %cd ~/docs/0425-ml_summit/scripts/
import plotly.express as px
from plotly.graph_objs import Figure, FigureWidget
import datasets
import pandas as pd
import huggingface_hub
import plotly.graph_objs as go
import numpy as np
from PIL import Image

FIGURES: dict[str, Figure] = {}
# %%

df = pd.read_csv("nlp_datas.csv")
fig = px.treemap(
    df,
    path=[px.Constant("nlp-datasets"), "task", "dataset"],
    values="size",
    # color="dataset",
    # hover_data=["iso_alpha"],
    # color_continuous_scale="RdBu",
)

FIGURES["nlp"] = fig
fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    # plot_bgcolor='rgba(0,0,0,0)',
)
# fig.update_traces(marker=dict(pattern=dict(shape=["|"], solidity=0.80)))
# fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
# figs.append(fig)
fig
# %%
df = pd.read_csv("llm.csv")
fig = px.treemap(
    df,
    path=[px.Constant("LLM"), "dataset"],
    values="size",
    # color="dataset",
    # hover_data=["iso_alpha"],
    # color_continuous_scale="RdBu",
)
FIGURES["gpt"] = fig
fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    # plot_bgcolor='rgba(0,0,0,0)',
)
# fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig
# %%

df = pd.read_csv("./seq-time.csv", index_col=0)
df.index = df.index.map(lambda x: eval(x.replace("k", "*1024")))
df["platformers"] = df["platformers"] / 7
df.drop([df.columns[-1]], axis=1, inplace=True)
df = df.reset_index(names="sequence length").melt(
    id_vars="sequence length", var_name="model", value_name="time"
)
fig = px.line(df, x="sequence length", y="time", color="model")
FIGURES["seq-time"] = fig
fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    plot_bgcolor="rgba(0,0,0,0)",
    legend_font=dict(color="white"),
)
fig.update_xaxes(
    color="white",
)
fig.update_yaxes(
    # showticklabels=False,
    # zeroline=False,
    # showline=False,
    # griddash="4px",
    # gridcolor="rgba(255,255,255,0.3)",
    # title="Loss",
    color="white",
)
fig
# %%

df = pd.read_csv("seq-tflops.csv", index_col=0)
# df['sequence length']
# df.index = df.index.map(lambda x: eval(x.replace("K", "*1024")))
df = df.reset_index(names="sequence length").melt(
    id_vars="sequence length", var_name="model", value_name="tflops"
)
fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group")
FIGURES["seq-tflops"] = fig
fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    plot_bgcolor="rgba(0,0,0,0)",
    legend_font=dict(color="white"),
)

fig.update_xaxes(
    color="white",
)
fig.update_yaxes(
    # showticklabels=False,
    # zeroline=False,
    # showline=False,
    # griddash="4px",
    # gridcolor="rgba(255,255,255,0.3)",
    # title="Loss",
    color="white",
)
fig
# %%


df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas()

df = df.drop(["duration"], axis=1)


fig = go.Figure(
    data=[
        go.Table(
            header=dict(
                values=list(df.columns), fill_color="paleturquoise", align="left"
            ),
            cells=dict(
                values=[df[col] for col in df.columns],
                fill_color="lavender",
                align="left",
                # alignsrc="center",
            ),
        )
    ]
)

fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    # plot_bgcolor='rgba(0,0,0,0)',
)
# fig.show()
FIGURES["webvid"] = fig
# %%

fig = go.Figure()

data = {
    "402-page transcripts from Apollo 11’s mission to the moon": 326914,
    "44-minute silent Buster Keaton movie": 696417,
    "more than 100,000 lines of code": 816767,
    "Generate 1min video": 1000000,
}

df = pd.Series(data, name="token").to_frame().reset_index(names="task")


# df = px.data.gapminder().query("continent == 'Europe' and year == 2007 and pop > 2.e6")
fig = px.bar(
    df,
    y="token",
    x="task",
    text_auto=".2s",
    # template="ggplot2",
    # color="white",
    # orientation="h",
)
FIGURES["token-bar"] = fig

fig.update_traces(
    textfont_size=12,
    textangle=0,
    textposition="outside",
    cliponaxis=False,
    textfont_color="white",
)
fig.update_layout(
    paper_bgcolor="rgba(0,0,0,0)",
    # autosize=True,
    margin=dict(t=0, l=0, r=0, b=0),
    plot_bgcolor="rgba(0,0,0,0)",
    legend_font=dict(color="white"),
)

fig.update_xaxes(
    color="white",
    # showticklabels=False,
    zeroline=False,
    showline=False,
    showgrid=False,
    title="",
)
fig.update_yaxes(
    # showticklabels=False,
    showline=False,
    showgrid=False,
    zeroline=False,
    # griddash="4px",
    # gridcolor="rgba(255,255,255,0.3)",
    # title="Loss",
    color="white",
)
fig


# %%
def generate_loss(steps, initial_loss, decay_rate, noise_factor):
    loss = initial_loss * np.exp(-decay_rate * steps)
    noise = noise_factor * loss * np.random.randn(*steps.shape)
    return loss + noise


def splitpoints(total, split):
    step = total // split
    for i in range(split - 1):
        yield slice(i * step, (i + 1) * step)
    yield slice((i + 1) * step, None)


meta = [
    {
        "name": "2xDGX on aws",
        "color": "red",
        "icon": "../figures/gc.png",
    },
    {
        "name": "16xDGX on aliyun",
        "color": "orange",
        "icon": "../figures/aws-white.png",
    },
    {
        "name": "128xDGX on ucloud",
        "color": "blue",
        "icon": "../figures/aliyun.png",
    },
]


steps = np.linspace(0, 1, 1000)
loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1)
fig = go.Figure()
# fig.update_layout(
#     title="Training Loss by Steps", xaxis_title="Steps", yaxis_title="Loss"
# )

FIGURES["cloud-switch"] = fig
for i, idx in enumerate(splitpoints(1000, len(meta))):
    fig.add_trace(
        go.Scatter(
            x=steps[idx],
            y=loss[idx],
            mode="lines",
            name=meta[i]["name"],
            line=dict(color=meta[i]["color"]),
        )
    )
fig.add_layout_image(
    x=0.8,
    sizex=0.2,
    y=0.2,
    sizey=0.2,
    xref="paper",
    yref="paper",
    opacity=1.0,
    layer="above",
    source=Image.open("../figures/logo/ucloud.png"),
)
fig.add_layout_image(
    x=0.17,
    sizex=0.15,
    y=0.7,
    sizey=0.15,
    xref="paper",
    yref="paper",
    opacity=1.0,
    layer="above",
    source=Image.open("../figures/aws-white.png"),
)
fig.add_layout_image(
    x=0.43,
    sizex=0.15,
    y=0.3,
    sizey=0.15,
    xref="paper",
    yref="paper",
    opacity=1.0,
    layer="above",
    source=Image.open("../figures/aliyun.png"),
)

fig.update_layout(
    showlegend=False,
    paper_bgcolor="rgba(0,0,0,0)",
    plot_bgcolor="rgba(255,255,255,0)",
    # plot_bgcolor="rgba(255,255,0)",
    # width=1120,
)
fig.update_xaxes(
    showticklabels=False,
    # ticklabelposition="inside left",
    showline=False,
    zeroline=False,
    showgrid=False,
    # title=dict(text="Steps", standoff=250),
    automargin=True,
)
fig.update_yaxes(
    showticklabels=False,
    zeroline=False,
    showline=False,
    griddash="4px",
    gridcolor="rgba(255,255,255,0.3)",
    title="Loss",
    color="white",
)
fig


# %%
def plot_gantt(df):
    fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task")

    fig.update_layout(xaxis_tickformat="%H:%M")

    fig.update_layout(
        showlegend=False,
        paper_bgcolor="rgba(0,0,0,0)",
        # plot_bgcolor="rgba(255,255,255,0.3)",
        plot_bgcolor="rgba(255,255,255,0)",
        # plot_bgcolor="rgba(255,255,0)",
        # width=1120,
    )
    fig.update_xaxes(
        showticklabels=False,
        # ticklabelposition="inside left",
        showline=False,
        zeroline=False,
        showgrid=False,
        # title=dict(text="Steps", standoff=250),
        automargin=True,
    )
    fig.update_yaxes(
        # showticklabels=False,
        zeroline=False,
        showline=False,
        griddash="4px",
        gridcolor="rgba(0,0,0,0.3)",
        title="",
        color="white",
        tickfont=dict(size=20),
    )

    return fig


# for hour slots randonly assign a task
num_rows = 1000
download_prop = 0.65
df = pd.DataFrame(
    {"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")}
).assign(
    End=lambda d: d.Start + pd.Timedelta(hours=1),
    Task=np.random.choice(
        ["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop)
    ),
)

df.loc[0, "Task"] = "Read"
df.loc[len(df) - 1, "Task"] = "Transform"
df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg(
    {"Start": "min", "End": "max", "Task": "first"}
)

timeline = df.copy()
# %%

df = timeline.copy()
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10)
for start, end in zip(ddi[2:-1:3], ddi[3::3]):
    df.loc[df["Start"].between(start, end), "Task"] = "Train"
    df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"})

FIGURES["profile-naive"] = plot_gantt(df)
FIGURES["profile-naive"]
# %%

df = timeline.copy()
prop = 10
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10)
for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]):
    df.loc[df["Start"].between(start, end), "Task"] = "Train"
    df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"})
FIGURES["profile-old"] = plot_gantt(df)
FIGURES["profile-old"]
# %%

df = timeline.copy()

df.loc[len(df) + 1] = pd.Series(
    {"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"}
)
FIGURES["profile-stream"] = plot_gantt(df)
FIGURES["profile-stream"]

# %%

for k, v in FIGURES.items():
    print(k)
    v.write_html(
        f"../components/{k}.qmd",
        full_html=False,
        include_plotlyjs="cdn",
    )

# for i in range(100):
#     print(i)
# %%
import qrcode
from qrcode.image.styledpil import StyledPilImage
from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer
from qrcode.image.styles.colormasks import RadialGradiantColorMask

qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L)
qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw")

# img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer())
img_2 = qr.make_image(
    # image_factory=StyledPilImage,
    # color_mask=RadialGradiantColorMask(),
    fill_color="white",
    back_color="transparent",
)
# img_3 = qr.make_image(
#     image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png"
# )
img_2.save("../figures/qr/jing.png")
# %%


qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L)
qr.add_data("mailto:[email protected]?subject=Hello&body=")

# img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer())
img_2 = qr.make_image(
    # image_factory=StyledPilImage,
    # color_mask=RadialGradiantColorMask(),
    fill_color="white",
    back_color="transparent",
)
# img_3 = qr.make_image(
#     image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png"
# )
img_2.save("../figures/qr/mail-data.png")