Spaces:
Runtime error
Runtime error
File size: 2,789 Bytes
305d23f 98f80c6 305d23f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import pipeline
import numpy as np
from time import perf_counter
from setfit import SetFitModel
from optimum.onnxruntime import ORTModelForFeatureExtraction
from quantization import OnnxSetFitModel
from transformers import AutoTokenizer
# Load the models
model1 = SetFitModel.from_pretrained("hsmashiana/base_model_hpml")
ort_model = ORTModelForFeatureExtraction.from_pretrained("hsmashiana/optimized_model_hpml", file_name="model_quantized.onnx")
tokenizer = AutoTokenizer.from_pretrained("hsmashiana/optimized_model_hpml")
model3 = OnnxSetFitModel(ort_model, tokenizer, model1.model_head)
decode = {0:"World",1:"Sports",2:"Business",3:"Sci/Tech"}
def compare_models(text):
# result1 = model1(text)
# result2 = model2(text)
# # Including model names in the results
# output1 = {"Model": "BERT Base Uncased", "Output": result1}
# output2 = {"Model": "RoBERTa Base", "Output": result2}
# return output1, output2
times = []
# Warm-up phase to ensure fair timing
for _ in range(5):
model1([text])
# Measure the execution time of model predictions
for _ in range(20):
start = perf_counter()
out1 = model1([text])
end = perf_counter()
times.append(end - start)
# Calculate mean and standard deviation of latency
avg_latency_ms_model_1 = np.mean(times) * 1000
# times = []
# # Warm-up phase to ensure fair timing
# for _ in range(5):
# model2([text])
# # Measure the execution time of model predictions
# for _ in range(20):
# start = perf_counter()
# out2 = model2([text])
# end = perf_counter()
# times.append(end - start)
# # Calculate mean and standard deviation of latency
# avg_latency_ms_model_2 = np.mean(times) * 1000
times = []
# Warm-up phase to ensure fair timing
for _ in range(5):
model3.predict([text])
# Measure the execution time of model predictions
for _ in range(20):
start = perf_counter()
out3 = model3([text])
end = perf_counter()
times.append(end - start)
# Calculate mean and standard deviation of latency
avg_latency_ms_model_3 = np.mean(times) * 1000
return {"answer":decode[out1.numpy()[0]],"avgtime":avg_latency_ms_model_1}, {"answer":decode[out3[0]],"avgtime":avg_latency_ms_model_3}
# Create a Gradio interface
iface = gr.Interface(
fn=compare_models,
inputs="text",
outputs=[
gr.components.JSON(label="Base miniLM"),
gr.components.JSON(label="Quantized Distilled miniLM")
],
title="Compare Sentence Classification Models",
description="Enter a sentence to see how each model classifies it."
)
# Run the interface
iface.launch()
|