|
import gradio as gr |
|
import pandas as pd |
|
import os |
|
import shutil |
|
import tempfile |
|
|
|
|
|
DESCRIPTION = """ |
|
<h2 style='text-align: center; color: #cbff4d !important; text-shadow: 2px 2px 4px rgba(0,0,0,0.1);'>๐ LLM Inference Leaderboard: Pushing the Boundaries of Performance ๐</h2> |
|
""" |
|
|
|
INTRODUCTION = """ |
|
<div style='background-color: #e6ffd9; padding: 20px; border-radius: 15px; margin-bottom: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> |
|
<h3 style='color: #00480a;'>๐ฌ Our Exciting Quest</h3> |
|
<p style='color: #00480a;'>We're on a thrilling journey to help developers discover the perfect LLMs and libraries for their innovative projects! We've put these models through their paces using six cutting-edge inference engines:</p> |
|
<ul style='color: #00480a;'> |
|
<li>๐ vLLM</li> |
|
<li>๐ TGI</li> |
|
<li>โก TensorRT-LLM</li> |
|
<li>๐ฎ Tritonvllm</li> |
|
<li>๐ Deepspeed-mii</li> |
|
<li>๐ฏ ctranslate</li> |
|
</ul> |
|
<p style='color: #00480a;'>All our tests were conducted on state-of-the-art A100 GPUs hosted on Azure, ensuring a fair and neutral battleground!</p> |
|
<p style='color: #00480a; font-weight: bold;'>Our mission: Empower developers, researchers, and AI enthusiasts to find their perfect LLM match for both development and production environments!</p> |
|
</div> |
|
""" |
|
|
|
HOW_WE_TESTED = """ |
|
<div style='background-color: #cbff4d; padding: 20px; border-radius: 15px; margin-top: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> |
|
<h3 style='color: #00480a;'>๐งช Our Rigorous Testing Process</h3> |
|
<p style='color: #00480a;'>We left no stone unturned in our quest for reliable benchmarks:</p> |
|
<ul style='color: #00480a;'> |
|
<li><strong>๐ฅ๏ธ Platform:</strong> A100 GPUs from Azure - the ultimate testing ground!</li> |
|
<li><strong>๐ณ Setup:</strong> Docker containers for each library, ensuring a pristine environment.</li> |
|
<li><strong>โ๏ธ Configuration:</strong> Standardized settings (temperature 0.5, top_p 1) for laser-focused performance comparisons.</li> |
|
<li><strong>๐ Prompts & Token Ranges:</strong> Six diverse prompts, input lengths from 20 to 2,000 tokens, and generation lengths of 100, 200, and 500 tokens - pushing the boundaries of flexibility!</li> |
|
<li><strong>๐ค Models & Libraries Tested:</strong> We put the best through their paces: Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it, using TGI, vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.</li> |
|
</ul> |
|
</div> |
|
""" |
|
|
|
csv_folder_path = 'result_csv/' |
|
|
|
UPLOAD_SECRET = os.getenv("UPLOAD_SECRET") |
|
|
|
|
|
|
|
def add_new_entry(file, password): |
|
global df |
|
if file is None: |
|
return df, "No file uploaded." |
|
|
|
if password != UPLOAD_SECRET: |
|
return df, "Incorrect password. Upload failed." |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: |
|
temp_file.write(file.read()) |
|
temp_file_path = temp_file.name |
|
|
|
|
|
new_df = pd.read_csv(temp_file_path) |
|
|
|
columns_order = [ |
|
"Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", |
|
"input_length", "output_length" |
|
] |
|
for col in columns_order: |
|
if col not in new_df.columns: |
|
new_df[col] = pd.NA |
|
new_df = new_df[columns_order] |
|
|
|
|
|
df = pd.concat([df, new_df], ignore_index=True) |
|
|
|
|
|
filename = os.path.basename(file.name) |
|
destination = os.path.join(csv_folder_path, filename) |
|
shutil.move(temp_file_path, destination) |
|
|
|
return df, f"File '{filename}' uploaded and data added successfully!" |
|
except Exception as e: |
|
return df, f"An error occurred: {str(e)}" |
|
finally: |
|
|
|
if 'temp_file_path' in locals() and os.path.exists(temp_file_path): |
|
os.unlink(temp_file_path) |
|
|
|
def read_and_process_csv_files(folder_path): |
|
all_data = [] |
|
for filename in os.listdir(folder_path): |
|
if filename.endswith('.csv'): |
|
file_path = os.path.join(folder_path, filename) |
|
df = pd.read_csv(file_path) |
|
all_data.append(df) |
|
|
|
combined_df = pd.concat(all_data, ignore_index=True) |
|
|
|
columns_order = [ |
|
"Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", |
|
"input_length", "output_length" |
|
] |
|
|
|
for col in columns_order: |
|
if col not in combined_df.columns: |
|
combined_df[col] = pd.NA |
|
|
|
return combined_df[columns_order] |
|
|
|
df = read_and_process_csv_files(csv_folder_path) |
|
|
|
def get_leaderboard_df(): |
|
return df |
|
|
|
def filter_and_search(search_term, library_filter): |
|
filtered_df = df.copy() |
|
|
|
if search_term: |
|
filtered_df = filtered_df[filtered_df['Model_Name'].str.contains(search_term, case=False, na=False)] |
|
|
|
if library_filter != "All": |
|
filtered_df = filtered_df[filtered_df['Library'] == library_filter] |
|
|
|
return filtered_df |
|
|
|
custom_css = """ |
|
body { |
|
background-color: #f0fff0; |
|
font-family: 'Roboto', sans-serif; |
|
} |
|
.gradio-container { |
|
max-width: 1200px !important; |
|
} |
|
.gradio-container .prose * { |
|
color: #00480a !important; |
|
} |
|
.gradio-container .prose h2, |
|
.gradio-container .prose h3 { |
|
color: #00480a !important; |
|
} |
|
.tabs { |
|
background-color: #e6ffd9; |
|
border-radius: 15px; |
|
overflow: hidden; |
|
box-shadow: 0 4px 6px rgba(0,0,0,0.1); |
|
} |
|
.tab-nav { |
|
background-color: #00480a; |
|
padding: 10px; |
|
} |
|
.tab-nav button { |
|
color: #cbff4d !important; |
|
background-color: #006400; |
|
border: none; |
|
padding: 10px 20px; |
|
margin-right: 5px; |
|
border-radius: 10px; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
} |
|
.tab-nav button:hover { |
|
background-color: #cbff4d; |
|
color: #00480a !important; |
|
} |
|
.tab-nav button.selected { |
|
background-color: #cbff4d; |
|
color: #00480a !important; |
|
font-weight: bold; |
|
} |
|
.gr-button-primary { |
|
background-color: #00480a !important; |
|
border-color: #00480a !important; |
|
color: #cbff4d !important; |
|
} |
|
.gr-button-primary:hover { |
|
background-color: #cbff4d !important; |
|
color: #00480a !important; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=custom_css) as demo: |
|
gr.HTML(DESCRIPTION) |
|
gr.HTML(INTRODUCTION) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("๐ Leaderboard"): |
|
with gr.Row(): |
|
search_input = gr.Textbox(label="๐ Search Model Name", placeholder="Enter model name...") |
|
library_dropdown = gr.Dropdown(choices=["All"] + df['Library'].unique().tolist(), label="๐ท๏ธ Filter by Library", value="All") |
|
|
|
leaderboard = gr.DataFrame(df) |
|
|
|
gr.HTML(HOW_WE_TESTED) |
|
|
|
with gr.TabItem("โ Add New Entry"): |
|
file_upload = gr.File(label="๐ Upload CSV File") |
|
password_input = gr.Textbox(label="๐ Upload Password", type="password") |
|
submit_button = gr.Button("๐ค Add Entry", variant="primary") |
|
result = gr.Markdown() |
|
|
|
submit_button.click( |
|
add_new_entry, |
|
inputs=[file_upload, password_input], |
|
outputs=[leaderboard, result] |
|
) |
|
|
|
search_input.change(filter_and_search, inputs=[search_input, library_dropdown], outputs=leaderboard) |
|
library_dropdown.change(filter_and_search, inputs=[search_input, library_dropdown], outputs=leaderboard) |
|
|
|
demo.load(get_leaderboard_df, outputs=[leaderboard]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |