Spaces:

hsmashiana
/

HPML_proj

Runtime error

App Files Files Community

hsmashiana commited on May 6, 2024

Commit

d889137

1 Parent(s): 98f80c6

updated base model

Browse files

Files changed (5) hide show

__pycache__/quantization.cpython-311.pyc +0 -0
app.py +63 -38
flagged/log.csv +2 -0
flagged/throughput Comparison/49a4c8006ae895a1b75f/image.webp +0 -0
mest.tar +0 -0

__pycache__/quantization.cpython-311.pyc ADDED Viewed

Binary file (5.25 kB). View file

app.py CHANGED Viewed

@@ -1,80 +1,105 @@
 import gradio as gr
-from transformers import pipeline
-import numpy as np
-from time import perf_counter
 from setfit import SetFitModel
 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from quantization import OnnxSetFitModel
-from transformers import AutoTokenizer
-# Load the models
-model1 = SetFitModel.from_pretrained("hsmashiana/base_model_hpml")
 ort_model = ORTModelForFeatureExtraction.from_pretrained("hsmashiana/optimized_model_hpml", file_name="model_quantized.onnx")
 tokenizer = AutoTokenizer.from_pretrained("hsmashiana/optimized_model_hpml")
 model3 = OnnxSetFitModel(ort_model, tokenizer, model1.model_head)
-decode = {0:"World",1:"Sports",2:"Business",3:"Sci/Tech"}
 def compare_models(text):
-    # result1 = model1(text)
-    # result2 = model2(text)
-    # # Including model names in the results
-    # output1 = {"Model": "BERT Base Uncased", "Output": result1}
-    # output2 = {"Model": "RoBERTa Base", "Output": result2}
-    # return output1, output2
     times = []
-        # Warm-up phase to ensure fair timing
     for _ in range(5):
         model1([text])
     # Measure the execution time of model predictions
     for _ in range(20):
         start = perf_counter()
         out1 = model1([text])
         end = perf_counter()
         times.append(end - start)
-    # Calculate mean and standard deviation of latency
-    avg_latency_ms_model_1 = np.mean(times) * 1000
-    # times = []
-    #     # Warm-up phase to ensure fair timing
-    # for _ in range(5):
-    #     model2([text])
-    # # Measure the execution time of model predictions
-    # for _ in range(20):
-    #     start = perf_counter()
-    #     out2 = model2([text])
-    #     end = perf_counter()
-    #     times.append(end - start)
-    # # Calculate mean and standard deviation of latency
-    # avg_latency_ms_model_2 = np.mean(times) * 1000
     times = []
-        # Warm-up phase to ensure fair timing
     for _ in range(5):
         model3.predict([text])
     # Measure the execution time of model predictions
     for _ in range(20):
         start = perf_counter()
-        out3 = model3([text])
         end = perf_counter()
         times.append(end - start)
-    # Calculate mean and standard deviation of latency
     avg_latency_ms_model_3 = np.mean(times) * 1000
-    return {"answer":decode[out1.numpy()[0]],"avgtime":avg_latency_ms_model_1}, {"answer":decode[out3[0]],"avgtime":avg_latency_ms_model_3}
-# Create a Gradio interface
 iface = gr.Interface(
     fn=compare_models,
     inputs="text",
     outputs=[
-        gr.components.JSON(label="Base miniLM"),
-        gr.components.JSON(label="Quantized Distilled miniLM")
     ],
     title="Compare Sentence Classification Models",
-    description="Enter a sentence to see how each model classifies it."
 )
-# Run the interface
 iface.launch()

 import gradio as gr
+from transformers import pipeline, AutoTokenizer
 from setfit import SetFitModel
 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from quantization import OnnxSetFitModel
+import numpy as np
+from time import perf_counter
+import matplotlib.pyplot as plt
+from io import BytesIO
+from PIL import Image
+import io
+# Load the models
+model1 = SetFitModel.from_pretrained("hsmashiana/basemodel_hpml")
 ort_model = ORTModelForFeatureExtraction.from_pretrained("hsmashiana/optimized_model_hpml", file_name="model_quantized.onnx")
 tokenizer = AutoTokenizer.from_pretrained("hsmashiana/optimized_model_hpml")
 model3 = OnnxSetFitModel(ort_model, tokenizer, model1.model_head)
+decode = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
+def plot_throughput_bar_chart(throughput_model1, throughput_model2):
+    labels = ['Base model', 'Optimized model']
+    throughputs = [throughput_model1, throughput_model2]
+    plt.figure(figsize=(8, 6))
+    plt.bar(labels, throughputs, color=['blue', 'navy'])
+    plt.xlabel('Models')
+    plt.ylabel('Throughput (tokens/second)')
+    plt.title('Model Throughput Comparison')
+    plt.tight_layout()
+    # Create a PIL Image from the plot
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    img = Image.open(buf)
+    plt.close()
+    return img
 def compare_models(text):
+    inputs = tokenizer(text, return_tensors="pt")
     times = []
+    # Warm-up phase to ensure fair timing
     for _ in range(5):
         model1([text])
     # Measure the execution time of model predictions
     for _ in range(20):
         start = perf_counter()
         out1 = model1([text])
         end = perf_counter()
         times.append(end - start)
+    avg_latency_ms_model_1 = np.mean(times) * 1000
     times = []
+    # Warm-up phase to ensure fair timing
     for _ in range(5):
         model3.predict([text])
     # Measure the execution time of model predictions
     for _ in range(20):
         start = perf_counter()
+        out3 = model3.predict([text])
         end = perf_counter()
         times.append(end - start)
     avg_latency_ms_model_3 = np.mean(times) * 1000
+    throughput_tokens_per_sec1 = inputs['input_ids'].size(1) / (avg_latency_ms_model_1 / 1000)
+    throughput_tokens_per_sec2 = inputs['input_ids'].size(1) / (avg_latency_ms_model_3 / 1000)
+    plot_data = plot_throughput_bar_chart(throughput_tokens_per_sec1, throughput_tokens_per_sec2)
+    result1 = {
+        "Base Model": {
+            "answer": decode[out1.numpy()[0]],
+            "average time (ms)": avg_latency_ms_model_1,
+            "throughput (tokens/sec)": throughput_tokens_per_sec1
+        }}
+    result2 = {
+        "Optimized Model": {
+            "answer": decode[out3[0]],
+            "average time (ms)": avg_latency_ms_model_3,
+            "throughput (tokens/sec)": throughput_tokens_per_sec2
+        }}
+    return result1, result2, plot_data
 iface = gr.Interface(
     fn=compare_models,
     inputs="text",
     outputs=[
+        gr.components.JSON(label="Base Model"),
+        gr.components.JSON(label="Optimized Model"),
+        gr.components.Image(label="throughput Comparison")
     ],
     title="Compare Sentence Classification Models",
+    description="Enter a sentence to see how each model classifies it and their throughputs.",
+    allow_flagging="never"
 )
 iface.launch()

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ text,Base Model,Optimized Model,throughput Comparison,flag,username,timestamp
2	+ hellool,"{""Base Model"": {""answer"": ""Business"", ""average time (ms)"": 9.624537501076702, ""throughput (tokens/sec)"": 415.60438613829683}}","{""Optimized Model"": {""answer"": ""Business"", ""average time (ms)"": 1.6875000983418431, ""throughput (tokens/sec)"": 2370.370232233139}}",flagged/throughput Comparison/49a4c8006ae895a1b75f/image.webp,,,2024-05-04 20:48:16.264716

flagged/throughput Comparison/49a4c8006ae895a1b75f/image.webp ADDED Viewed

mest.tar ADDED Viewed

Binary file (1.02 kB). View file