import plotly.express as px |
from plotly.graph_objs import Figure, FigureWidget |
import datasets |
import pandas as pd |
import huggingface_hub |
import plotly.graph_objs as go |
import numpy as np |
from PIL import Image |
FIGURES: dict[str, Figure] = {} |
df = pd.read_csv("nlp_datas.csv") |
fig = px.treemap( |
df, |
path=[px.Constant("nlp-datasets"), "task", "dataset"], |
values="size", |
) |
FIGURES["nlp"] = fig |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
) |
fig |
df = pd.read_csv("llm.csv") |
fig = px.treemap( |
df, |
path=[px.Constant("LLM"), "dataset"], |
values="size", |
) |
FIGURES["gpt"] = fig |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
) |
fig |
df = pd.read_csv("./seq-time.csv", index_col=0) |
df.index = df.index.map(lambda x: eval(x.replace("k", "*1024"))) |
df["platformers"] = df["platformers"] / 7 |
df.drop([df.columns[-1]], axis=1, inplace=True) |
df = df.reset_index(names="sequence length").melt( |
id_vars="sequence length", var_name="model", value_name="time" |
) |
fig = px.line(df, x="sequence length", y="time", color="model") |
FIGURES["seq-time"] = fig |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
plot_bgcolor="rgba(0,0,0,0)", |
legend_font=dict(color="white"), |
) |
fig.update_xaxes( |
color="white", |
) |
fig.update_yaxes( |
color="white", |
) |
fig |
df = pd.read_csv("seq-tflops.csv", index_col=0) |
df = df.reset_index(names="sequence length").melt( |
id_vars="sequence length", var_name="model", value_name="tflops" |
) |
fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group") |
FIGURES["seq-tflops"] = fig |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
plot_bgcolor="rgba(0,0,0,0)", |
legend_font=dict(color="white"), |
) |
fig.update_xaxes( |
color="white", |
) |
fig.update_yaxes( |
color="white", |
) |
fig |
df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas() |
df = df.drop(["duration"], axis=1) |
fig = go.Figure( |
data=[ |
go.Table( |
header=dict( |
values=list(df.columns), fill_color="paleturquoise", align="left" |
), |
cells=dict( |
values=[df[col] for col in df.columns], |
fill_color="lavender", |
align="left", |
), |
) |
] |
) |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
) |
FIGURES["webvid"] = fig |
fig = go.Figure() |
data = { |
"402-page transcripts from Apollo 11’s mission to the moon": 326914, |
"44-minute silent Buster Keaton movie": 696417, |
"more than 100,000 lines of code": 816767, |
"Generate 1min video": 1000000, |
} |
df = pd.Series(data, name="token").to_frame().reset_index(names="task") |
fig = px.bar( |
df, |
y="token", |
x="task", |
text_auto=".2s", |
) |
FIGURES["token-bar"] = fig |
fig.update_traces( |
textfont_size=12, |
textangle=0, |
textposition="outside", |
cliponaxis=False, |
textfont_color="white", |
) |
fig.update_layout( |
paper_bgcolor="rgba(0,0,0,0)", |
margin=dict(t=0, l=0, r=0, b=0), |
plot_bgcolor="rgba(0,0,0,0)", |
legend_font=dict(color="white"), |
) |
fig.update_xaxes( |
color="white", |
zeroline=False, |
showline=False, |
showgrid=False, |
title="", |
) |
fig.update_yaxes( |
showline=False, |
showgrid=False, |
zeroline=False, |
color="white", |
) |
fig |
def generate_loss(steps, initial_loss, decay_rate, noise_factor): |
loss = initial_loss * np.exp(-decay_rate * steps) |
noise = noise_factor * loss * np.random.randn(*steps.shape) |
return loss + noise |
def splitpoints(total, split): |
step = total // split |
for i in range(split - 1): |
yield slice(i * step, (i + 1) * step) |
yield slice((i + 1) * step, None) |
meta = [ |
{ |
"name": "2xDGX on aws", |
"color": "red", |
"icon": "../figures/gc.png", |
}, |
{ |
"name": "16xDGX on aliyun", |
"color": "orange", |
"icon": "../figures/aws-white.png", |
}, |
{ |
"name": "128xDGX on ucloud", |
"color": "blue", |
"icon": "../figures/aliyun.png", |
}, |
] |
steps = np.linspace(0, 1, 1000) |
loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1) |
fig = go.Figure() |
FIGURES["cloud-switch"] = fig |
for i, idx in enumerate(splitpoints(1000, len(meta))): |
fig.add_trace( |
go.Scatter( |
x=steps[idx], |
y=loss[idx], |
mode="lines", |
name=meta[i]["name"], |
line=dict(color=meta[i]["color"]), |
) |
) |
fig.add_layout_image( |
x=0.8, |
sizex=0.2, |
y=0.2, |
sizey=0.2, |
xref="paper", |
yref="paper", |
opacity=1.0, |
layer="above", |
source=Image.open("../figures/logo/ucloud.png"), |
) |
fig.add_layout_image( |
x=0.17, |
sizex=0.15, |
y=0.7, |
sizey=0.15, |
xref="paper", |
yref="paper", |
opacity=1.0, |
layer="above", |
source=Image.open("../figures/aws-white.png"), |
) |
fig.add_layout_image( |
x=0.43, |
sizex=0.15, |
y=0.3, |
sizey=0.15, |
xref="paper", |
yref="paper", |
opacity=1.0, |
layer="above", |
source=Image.open("../figures/aliyun.png"), |
) |
fig.update_layout( |
showlegend=False, |
paper_bgcolor="rgba(0,0,0,0)", |
plot_bgcolor="rgba(255,255,255,0)", |
) |
fig.update_xaxes( |
showticklabels=False, |
showline=False, |
zeroline=False, |
showgrid=False, |
automargin=True, |
) |
fig.update_yaxes( |
showticklabels=False, |
zeroline=False, |
showline=False, |
griddash="4px", |
gridcolor="rgba(255,255,255,0.3)", |
title="Loss", |
color="white", |
) |
fig |
def plot_gantt(df): |
fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task") |
fig.update_layout(xaxis_tickformat="%H:%M") |
fig.update_layout( |
showlegend=False, |
paper_bgcolor="rgba(0,0,0,0)", |
plot_bgcolor="rgba(255,255,255,0)", |
) |
fig.update_xaxes( |
showticklabels=False, |
showline=False, |
zeroline=False, |
showgrid=False, |
automargin=True, |
) |
fig.update_yaxes( |
zeroline=False, |
showline=False, |
griddash="4px", |
gridcolor="rgba(0,0,0,0.3)", |
title="", |
color="white", |
tickfont=dict(size=20), |
) |
return fig |
num_rows = 1000 |
download_prop = 0.65 |
df = pd.DataFrame( |
{"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")} |
).assign( |
End=lambda d: d.Start + pd.Timedelta(hours=1), |
Task=np.random.choice( |
["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop) |
), |
) |
df.loc[0, "Task"] = "Read" |
df.loc[len(df) - 1, "Task"] = "Transform" |
df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg( |
{"Start": "min", "End": "max", "Task": "first"} |
) |
timeline = df.copy() |
df = timeline.copy() |
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10) |
for start, end in zip(ddi[2:-1:3], ddi[3::3]): |
df.loc[df["Start"].between(start, end), "Task"] = "Train" |
df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
FIGURES["profile-naive"] = plot_gantt(df) |
FIGURES["profile-naive"] |
df = timeline.copy() |
prop = 10 |
ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10) |
for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]): |
df.loc[df["Start"].between(start, end), "Task"] = "Train" |
df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
FIGURES["profile-old"] = plot_gantt(df) |
FIGURES["profile-old"] |
df = timeline.copy() |
df.loc[len(df) + 1] = pd.Series( |
{"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"} |
) |
FIGURES["profile-stream"] = plot_gantt(df) |
FIGURES["profile-stream"] |
for k, v in FIGURES.items(): |
print(k) |
v.write_html( |
f"../components/{k}.qmd", |
full_html=False, |
include_plotlyjs="cdn", |
) |
import qrcode |
from qrcode.image.styledpil import StyledPilImage |
from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer |
from qrcode.image.styles.colormasks import RadialGradiantColorMask |
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw") |
img_2 = qr.make_image( |
fill_color="white", |
back_color="transparent", |
) |
img_2.save("../figures/qr/jing.png") |
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
qr.add_data("mailto:[email protected]?subject=Hello&body=") |
img_2 = qr.make_image( |
fill_color="white", |
back_color="transparent", |
) |
img_2.save("../figures/qr/mail-data.png") |