Spaces:
Running
Running
File size: 2,519 Bytes
c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd 1cbd09d c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd d8b9ce2 c8763bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, Repository
from apscheduler.schedulers.background import BackgroundScheduler
from src.assets.text_content import TITLE, INTRODUCTION_TEXT
from src.assets.css_html_js import custom_css, get_window_url_params
OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
LLM_PERF_DATASET_REPO = "optimum/llm-perf"
def restart_space():
HfApi().restart_space(
repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
)
def load_dataset_repo():
llm_perf_repo = None
if OPTIMUM_TOKEN:
print("Loading LLM-Perf-Dataset from Hub...")
llm_perf_repo = Repository(
local_dir="./llm-perf/",
clone_from=LLM_PERF_DATASET_REPO,
token=OPTIMUM_TOKEN,
repo_type="dataset",
)
llm_perf_repo.git_pull()
return llm_perf_repo
def get_leaderboard_df():
if llm_perf_repo:
llm_perf_repo.git_pull()
df = pd.read_csv("./llm-perf/reports/cuda_1_100/inference_report.csv")
df = df[["model", "backend.name", "backend.torch_dtype", "backend.quantization",
"generate.latency(s)", "generate.throughput(tokens/s)"]]
df.rename(columns={
"model": "Model",
"backend.name": "Backend",
"backend.torch_dtype": "Torch dtype",
"backend.quantization": "Quantization",
"generate.latency(s)": "Latency (s)",
"generate.throughput(tokens/s)": "Throughput (tokens/s)"
}, inplace=True)
df.sort_values(by=["Throughput (tokens/s)"], ascending=False, inplace=True)
return df
def refresh():
leaderboard_df = get_leaderboard_df()
return leaderboard_df
llm_perf_repo = load_dataset_repo()
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("Vanilla Benchmark", elem_id="vanilla-benchmark", id=0):
leaderboard_df = get_leaderboard_df()
leaderboard_table_lite = gr.components.Dataframe(
value=leaderboard_df,
headers=leaderboard_df.columns.tolist(),
max_rows=None,
elem_id="leaderboard-table-lite",
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()
demo.queue(concurrency_count=40).launch()
|