Spaces:
Runtime error
Runtime error
Commit
·
f0d14cb
1
Parent(s):
a275f69
Update app.py
Browse files
app.py
CHANGED
@@ -67,7 +67,7 @@ mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
|
|
67 |
c1, c2 = st.columns([2, 3])
|
68 |
att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
69 |
|
70 |
-
st.
|
71 |
st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
|
72 |
st.caption("Multi-Head Attention")
|
73 |
mha_flop = 2*bs*h*n*(d/h)
|
@@ -82,6 +82,13 @@ c1, c2 = st.columns([2, 3])
|
|
82 |
att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
83 |
|
84 |
st.subheader('Output projection')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
st.header('MLP')
|
87 |
st.subheader('First Linear')
|
|
|
67 |
c1, c2 = st.columns([2, 3])
|
68 |
att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
69 |
|
70 |
+
st.subheader('Attention-value gemm')
|
71 |
st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
|
72 |
st.caption("Multi-Head Attention")
|
73 |
mha_flop = 2*bs*h*n*(d/h)
|
|
|
82 |
att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
83 |
|
84 |
st.subheader('Output projection')
|
85 |
+
mlp1_flop = 2*bs*1*d
|
86 |
+
mlp1_bytes = 2*bs*1*d + 2*d*4*d + 2*bs*1*4*d
|
87 |
+
c1, c2 = st.columns([2, 3])
|
88 |
+
mlp1_time = print_kernel_execution(c1, c2, mlp1_flop, mlp1_bytes)
|
89 |
+
|
90 |
+
st.subheader('Element-wise ops')
|
91 |
+
st.write("A couple of layers ")
|
92 |
|
93 |
st.header('MLP')
|
94 |
st.subheader('First Linear')
|