Spaces:

fxmarty
/

bettertransformer-demo

Running

Felix Marty commited on Nov 25, 2022

Commit

6e19ff8

1 Parent(s): 0325bda

turn off aws instances

Files changed (2) hide show

app.py CHANGED Viewed

@@ -80,6 +80,12 @@ with gr.Blocks() as demo:
         "## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
     )
     gr.Markdown(
         """
     Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!

         "## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
     )
+    gr.Markdown(
+    """
+    **The two AWS instances powering this Space are offline (to save us the $$$). Feel free to reproduce using [this backend code](https://github.com/fxmarty/bettertransformer_demo). The example results are from an AWS EC2 g4dn.xlarge instance with a single NVIDIA T4 GPU.**
+    """
+    )
     gr.Markdown(
         """
     Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!

defaults.py CHANGED Viewed

@@ -1,35 +1,35 @@
 defaults_vanilla_single = {
     "status": 200,
     "prediction": "Positive",
-    "inf_latency": 7.66,
     "peak_gpu_memory": 2706.21,
-    "end_to_end_latency": 309.65,
 }
 defaults_bt_single = {
     "status": 200,
     "prediction": "Positive",
-    "inf_latency": 6.01,
     "peak_gpu_memory": 2706.22,
-    "end_to_end_latency": 303.53,
 }
 defaults_vanilla_spam = {
-    "throughput": 28.04,
-    "mean_inference_latency": 24.43,
-    "mean_peak_gpu_memory": 2907.92,
     "mean_padding_ratio": 69.53,
     "mean_sequence_length": 128.0,
-    "effective_batch_size": 4.3,
 }
 defaults_bt_spam = {
-    "throughput": 38.53,
-    "mean_inference_latency": 12.73,
-    "mean_peak_gpu_memory": 2761.64,
     "mean_padding_ratio": 69.53,
     "mean_sequence_length": 128.0,
-    "effective_batch_size": 4.7,
 }
 BATCH_SIZE = 8  # fixed!

 defaults_vanilla_single = {
     "status": 200,
     "prediction": "Positive",
+    "inf_latency": 6.25,
     "peak_gpu_memory": 2706.21,
+    "end_to_end_latency": 81.95,
 }
 defaults_bt_single = {
     "status": 200,
     "prediction": "Positive",
+    "inf_latency": 4.96,
     "peak_gpu_memory": 2706.22,
+    "end_to_end_latency": 78.69,
 }
 defaults_vanilla_spam = {
+    "throughput": 184.58,
+    "mean_inference_latency": 32.2,
+    "mean_peak_gpu_memory": 3046.26,
     "mean_padding_ratio": 69.53,
     "mean_sequence_length": 128.0,
+    "effective_batch_size": 8.0,
 }
 defaults_bt_spam = {
+    "throughput": 312.21,
+    "mean_inference_latency": 14.42,
+    "mean_peak_gpu_memory": 2798.78,
     "mean_padding_ratio": 69.53,
     "mean_sequence_length": 128.0,
+    "effective_batch_size": 8.0,
 }
 BATCH_SIZE = 8  # fixed!