Spaces:

harmdevries
/

transformer_inference

Runtime error

harmdevries commited on Oct 23, 2022

Commit

f0d14cb

1 Parent(s): a275f69

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,7 +67,7 @@ mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
 c1, c2 = st.columns([2, 3])
 att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
-st.header('Attention-value gemm')
 st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
 st.caption("Multi-Head Attention")
 mha_flop = 2*bs*h*n*(d/h)
@@ -82,6 +82,13 @@ c1, c2 = st.columns([2, 3])
 att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
 st.subheader('Output projection')
 st.header('MLP')
 st.subheader('First Linear')

 c1, c2 = st.columns([2, 3])
 att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
+st.subheader('Attention-value gemm')
 st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
 st.caption("Multi-Head Attention")
 mha_flop = 2*bs*h*n*(d/h)
 att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
 st.subheader('Output projection')
+mlp1_flop = 2*bs*1*d
+mlp1_bytes = 2*bs*1*d + 2*d*4*d + 2*bs*1*4*d
+c1, c2 = st.columns([2, 3])
+mlp1_time = print_kernel_execution(c1, c2, mlp1_flop, mlp1_bytes)
+st.subheader('Element-wise ops')
+st.write("A couple of layers ")
 st.header('MLP')
 st.subheader('First Linear')