Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
from time import perf_counter | |
from setfit import SetFitModel | |
from optimum.onnxruntime import ORTModelForFeatureExtraction | |
from quantization import OnnxSetFitModel | |
from transformers import AutoTokenizer | |
# Load the models | |
model1 = SetFitModel.from_pretrained("hsmashiana/base_model_hpml") | |
ort_model = ORTModelForFeatureExtraction.from_pretrained("hsmashiana/optimized_model_hpml", file_name="model_quantized.onnx") | |
tokenizer = AutoTokenizer.from_pretrained("hsmashiana/optimized_model_hpml") | |
model3 = OnnxSetFitModel(ort_model, tokenizer, model1.model_head) | |
decode = {0:"World",1:"Sports",2:"Business",3:"Sci/Tech"} | |
def compare_models(text): | |
# result1 = model1(text) | |
# result2 = model2(text) | |
# # Including model names in the results | |
# output1 = {"Model": "BERT Base Uncased", "Output": result1} | |
# output2 = {"Model": "RoBERTa Base", "Output": result2} | |
# return output1, output2 | |
times = [] | |
# Warm-up phase to ensure fair timing | |
for _ in range(5): | |
model1([text]) | |
# Measure the execution time of model predictions | |
for _ in range(20): | |
start = perf_counter() | |
out1 = model1([text]) | |
end = perf_counter() | |
times.append(end - start) | |
# Calculate mean and standard deviation of latency | |
avg_latency_ms_model_1 = np.mean(times) * 1000 | |
# times = [] | |
# # Warm-up phase to ensure fair timing | |
# for _ in range(5): | |
# model2([text]) | |
# # Measure the execution time of model predictions | |
# for _ in range(20): | |
# start = perf_counter() | |
# out2 = model2([text]) | |
# end = perf_counter() | |
# times.append(end - start) | |
# # Calculate mean and standard deviation of latency | |
# avg_latency_ms_model_2 = np.mean(times) * 1000 | |
times = [] | |
# Warm-up phase to ensure fair timing | |
for _ in range(5): | |
model3.predict([text]) | |
# Measure the execution time of model predictions | |
for _ in range(20): | |
start = perf_counter() | |
out3 = model3([text]) | |
end = perf_counter() | |
times.append(end - start) | |
# Calculate mean and standard deviation of latency | |
avg_latency_ms_model_3 = np.mean(times) * 1000 | |
return {"answer":decode[out1.numpy()[0]],"avgtime":avg_latency_ms_model_1}, {"answer":decode[out3[0]],"avgtime":avg_latency_ms_model_3} | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=compare_models, | |
inputs="text", | |
outputs=[ | |
gr.components.JSON(label="Base miniLM"), | |
gr.components.JSON(label="Quantized Distilled miniLM") | |
], | |
title="Compare Sentence Classification Models", | |
description="Enter a sentence to see how each model classifies it." | |
) | |
# Run the interface | |
iface.launch() | |