Spaces:
Sleeping
Sleeping
File size: 1,405 Bytes
a9bfa7d cb34984 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
import torch
# from transformers import AutoModel, AutoTokenizer
def load_model(model_link):
# model = AutoModel.from_pretrained(model_link)
return "model"
def update_config(quantization_type, bits, threshold):
# Configuration logic here
return {"quantization": quantization_type, "bits": bits, "threshold": threshold}
def run_benchmark(model, config):
# Benchmarking logic here
return {"speed": "X ms/token", "memory": "Y GB"}
# Create the interface
with gr.Blocks() as demo:
with gr.Tab("Model Loading"):
model_input = gr.Textbox(label="Hugging Face Model Link")
model_type = gr.Dropdown(choices=["BERT", "GPT", "T5"], label="Model Type")
load_btn = gr.Button("Load Model")
with gr.Tab("Quantization"):
quant_type = gr.Dropdown(choices=["INT8", "INT4", "FP16"], label="Quantization Type")
bits = gr.Slider(minimum=4, maximum=8, step=1, label="Bits")
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
with gr.Tab("Benchmarking"):
benchmark_btn = gr.Button("Run Benchmark")
results = gr.JSON(label="Benchmark Results")
# Set up event handlers
load_btn.click(load_model, inputs=[model_input])
benchmark_btn.click(
run_benchmark,
inputs=[model_type, quant_type, bits, threshold],
outputs=[results]
)
demo.launch()
|