|
|
|
|
|
|
|
import plotly.express as px |
|
from plotly.graph_objs import Figure, FigureWidget |
|
import datasets |
|
import pandas as pd |
|
import huggingface_hub |
|
import plotly.graph_objs as go |
|
import numpy as np |
|
from PIL import Image |
|
|
|
FIGURES: dict[str, Figure] = {} |
|
|
|
|
|
df = pd.read_csv("nlp_datas.csv") |
|
fig = px.treemap( |
|
df, |
|
path=[px.Constant("nlp-datasets"), "task", "dataset"], |
|
values="size", |
|
|
|
|
|
|
|
) |
|
|
|
FIGURES["nlp"] = fig |
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
|
|
) |
|
|
|
|
|
|
|
fig |
|
|
|
df = pd.read_csv("llm.csv") |
|
fig = px.treemap( |
|
df, |
|
path=[px.Constant("LLM"), "dataset"], |
|
values="size", |
|
|
|
|
|
|
|
) |
|
FIGURES["gpt"] = fig |
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
|
|
) |
|
|
|
fig |
|
|
|
|
|
df = pd.read_csv("./seq-time.csv", index_col=0) |
|
df.index = df.index.map(lambda x: eval(x.replace("k", "*1024"))) |
|
df["platformers"] = df["platformers"] / 7 |
|
df.drop([df.columns[-1]], axis=1, inplace=True) |
|
df = df.reset_index(names="sequence length").melt( |
|
id_vars="sequence length", var_name="model", value_name="time" |
|
) |
|
fig = px.line(df, x="sequence length", y="time", color="model") |
|
FIGURES["seq-time"] = fig |
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
legend_font=dict(color="white"), |
|
) |
|
fig.update_xaxes( |
|
color="white", |
|
) |
|
fig.update_yaxes( |
|
|
|
|
|
|
|
|
|
|
|
|
|
color="white", |
|
) |
|
fig |
|
|
|
|
|
df = pd.read_csv("seq-tflops.csv", index_col=0) |
|
|
|
|
|
df = df.reset_index(names="sequence length").melt( |
|
id_vars="sequence length", var_name="model", value_name="tflops" |
|
) |
|
fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group") |
|
FIGURES["seq-tflops"] = fig |
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
legend_font=dict(color="white"), |
|
) |
|
|
|
fig.update_xaxes( |
|
color="white", |
|
) |
|
fig.update_yaxes( |
|
|
|
|
|
|
|
|
|
|
|
|
|
color="white", |
|
) |
|
fig |
|
|
|
|
|
|
|
df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas() |
|
|
|
df = df.drop(["duration"], axis=1) |
|
|
|
|
|
fig = go.Figure( |
|
data=[ |
|
go.Table( |
|
header=dict( |
|
values=list(df.columns), fill_color="paleturquoise", align="left" |
|
), |
|
cells=dict( |
|
values=[df[col] for col in df.columns], |
|
fill_color="lavender", |
|
align="left", |
|
|
|
), |
|
) |
|
] |
|
) |
|
|
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
|
|
) |
|
|
|
FIGURES["webvid"] = fig |
|
|
|
|
|
fig = go.Figure() |
|
|
|
data = { |
|
"402-page transcripts from Apollo 11’s mission to the moon": 326914, |
|
"44-minute silent Buster Keaton movie": 696417, |
|
"more than 100,000 lines of code": 816767, |
|
"Generate 1min video": 1000000, |
|
} |
|
|
|
df = pd.Series(data, name="token").to_frame().reset_index(names="task") |
|
|
|
|
|
|
|
fig = px.bar( |
|
df, |
|
y="token", |
|
x="task", |
|
text_auto=".2s", |
|
|
|
|
|
|
|
) |
|
FIGURES["token-bar"] = fig |
|
|
|
fig.update_traces( |
|
textfont_size=12, |
|
textangle=0, |
|
textposition="outside", |
|
cliponaxis=False, |
|
textfont_color="white", |
|
) |
|
fig.update_layout( |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
margin=dict(t=0, l=0, r=0, b=0), |
|
plot_bgcolor="rgba(0,0,0,0)", |
|
legend_font=dict(color="white"), |
|
) |
|
|
|
fig.update_xaxes( |
|
color="white", |
|
|
|
zeroline=False, |
|
showline=False, |
|
showgrid=False, |
|
title="", |
|
) |
|
fig.update_yaxes( |
|
|
|
showline=False, |
|
showgrid=False, |
|
zeroline=False, |
|
|
|
|
|
|
|
color="white", |
|
) |
|
fig |
|
|
|
|
|
|
|
def generate_loss(steps, initial_loss, decay_rate, noise_factor): |
|
loss = initial_loss * np.exp(-decay_rate * steps) |
|
noise = noise_factor * loss * np.random.randn(*steps.shape) |
|
return loss + noise |
|
|
|
|
|
def splitpoints(total, split): |
|
step = total // split |
|
for i in range(split - 1): |
|
yield slice(i * step, (i + 1) * step) |
|
yield slice((i + 1) * step, None) |
|
|
|
|
|
meta = [ |
|
{ |
|
"name": "2xDGX on aws", |
|
"color": "red", |
|
"icon": "../figures/gc.png", |
|
}, |
|
{ |
|
"name": "16xDGX on aliyun", |
|
"color": "orange", |
|
"icon": "../figures/aws-white.png", |
|
}, |
|
{ |
|
"name": "128xDGX on ucloud", |
|
"color": "blue", |
|
"icon": "../figures/aliyun.png", |
|
}, |
|
] |
|
|
|
|
|
steps = np.linspace(0, 1, 1000) |
|
loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1) |
|
fig = go.Figure() |
|
|
|
|
|
|
|
|
|
FIGURES["cloud-switch"] = fig |
|
for i, idx in enumerate(splitpoints(1000, len(meta))): |
|
fig.add_trace( |
|
go.Scatter( |
|
x=steps[idx], |
|
y=loss[idx], |
|
mode="lines", |
|
name=meta[i]["name"], |
|
line=dict(color=meta[i]["color"]), |
|
) |
|
) |
|
fig.add_layout_image( |
|
x=0.8, |
|
sizex=0.2, |
|
y=0.2, |
|
sizey=0.2, |
|
xref="paper", |
|
yref="paper", |
|
opacity=1.0, |
|
layer="above", |
|
source=Image.open("../figures/logo/ucloud.png"), |
|
) |
|
fig.add_layout_image( |
|
x=0.17, |
|
sizex=0.15, |
|
y=0.7, |
|
sizey=0.15, |
|
xref="paper", |
|
yref="paper", |
|
opacity=1.0, |
|
layer="above", |
|
source=Image.open("../figures/aws-white.png"), |
|
) |
|
fig.add_layout_image( |
|
x=0.43, |
|
sizex=0.15, |
|
y=0.3, |
|
sizey=0.15, |
|
xref="paper", |
|
yref="paper", |
|
opacity=1.0, |
|
layer="above", |
|
source=Image.open("../figures/aliyun.png"), |
|
) |
|
|
|
fig.update_layout( |
|
showlegend=False, |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
plot_bgcolor="rgba(255,255,255,0)", |
|
|
|
|
|
) |
|
fig.update_xaxes( |
|
showticklabels=False, |
|
|
|
showline=False, |
|
zeroline=False, |
|
showgrid=False, |
|
|
|
automargin=True, |
|
) |
|
fig.update_yaxes( |
|
showticklabels=False, |
|
zeroline=False, |
|
showline=False, |
|
griddash="4px", |
|
gridcolor="rgba(255,255,255,0.3)", |
|
title="Loss", |
|
color="white", |
|
) |
|
fig |
|
|
|
|
|
|
|
def plot_gantt(df): |
|
fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task") |
|
|
|
fig.update_layout(xaxis_tickformat="%H:%M") |
|
|
|
fig.update_layout( |
|
showlegend=False, |
|
paper_bgcolor="rgba(0,0,0,0)", |
|
|
|
plot_bgcolor="rgba(255,255,255,0)", |
|
|
|
|
|
) |
|
fig.update_xaxes( |
|
showticklabels=False, |
|
|
|
showline=False, |
|
zeroline=False, |
|
showgrid=False, |
|
|
|
automargin=True, |
|
) |
|
fig.update_yaxes( |
|
|
|
zeroline=False, |
|
showline=False, |
|
griddash="4px", |
|
gridcolor="rgba(0,0,0,0.3)", |
|
title="", |
|
color="white", |
|
tickfont=dict(size=20), |
|
) |
|
|
|
return fig |
|
|
|
|
|
|
|
num_rows = 1000 |
|
download_prop = 0.65 |
|
df = pd.DataFrame( |
|
{"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")} |
|
).assign( |
|
End=lambda d: d.Start + pd.Timedelta(hours=1), |
|
Task=np.random.choice( |
|
["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop) |
|
), |
|
) |
|
|
|
df.loc[0, "Task"] = "Read" |
|
df.loc[len(df) - 1, "Task"] = "Transform" |
|
df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg( |
|
{"Start": "min", "End": "max", "Task": "first"} |
|
) |
|
|
|
timeline = df.copy() |
|
|
|
|
|
df = timeline.copy() |
|
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10) |
|
for start, end in zip(ddi[2:-1:3], ddi[3::3]): |
|
df.loc[df["Start"].between(start, end), "Task"] = "Train" |
|
df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
|
|
|
FIGURES["profile-naive"] = plot_gantt(df) |
|
FIGURES["profile-naive"] |
|
|
|
|
|
df = timeline.copy() |
|
prop = 10 |
|
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10) |
|
for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]): |
|
df.loc[df["Start"].between(start, end), "Task"] = "Train" |
|
df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
|
FIGURES["profile-old"] = plot_gantt(df) |
|
FIGURES["profile-old"] |
|
|
|
|
|
df = timeline.copy() |
|
|
|
df.loc[len(df) + 1] = pd.Series( |
|
{"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"} |
|
) |
|
FIGURES["profile-stream"] = plot_gantt(df) |
|
FIGURES["profile-stream"] |
|
|
|
|
|
|
|
for k, v in FIGURES.items(): |
|
print(k) |
|
v.write_html( |
|
f"../components/{k}.qmd", |
|
full_html=False, |
|
include_plotlyjs="cdn", |
|
) |
|
|
|
|
|
|
|
|
|
import qrcode |
|
from qrcode.image.styledpil import StyledPilImage |
|
from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer |
|
from qrcode.image.styles.colormasks import RadialGradiantColorMask |
|
|
|
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
|
qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw") |
|
|
|
|
|
img_2 = qr.make_image( |
|
|
|
|
|
fill_color="white", |
|
back_color="transparent", |
|
) |
|
|
|
|
|
|
|
img_2.save("../figures/qr/jing.png") |
|
|
|
|
|
|
|
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
|
qr.add_data("mailto:[email protected]?subject=Hello&body=") |
|
|
|
|
|
img_2 = qr.make_image( |
|
|
|
|
|
fill_color="white", |
|
back_color="transparent", |
|
) |
|
|
|
|
|
|
|
img_2.save("../figures/qr/mail-data.png") |
|
|
|
|
|
|