Spaces:
Runtime error
Runtime error
Commit
·
409563e
1
Parent(s):
07abc51
Update app.py
Browse files
app.py
CHANGED
@@ -95,7 +95,7 @@ def mlp_exec(bs, h, n, d):
|
|
95 |
exec_time = calc_exec_time(flop, nbytes)
|
96 |
return flop, nbytes, exec_time
|
97 |
|
98 |
-
def print_kernel_execution(flop,
|
99 |
c1, c2 = st.columns([2, 3])
|
100 |
exec_time = calc_exec_time(flop, nbytes, include_overhead=False)
|
101 |
flop = round(flop/1e9, 2)
|
@@ -175,16 +175,12 @@ if breakdown:
|
|
175 |
st.write("Showing calculation for the maximum sequence length (n)")
|
176 |
|
177 |
st.caption("Multi-Head Attention")
|
178 |
-
|
179 |
-
|
180 |
-
c1, c2 = st.columns([2, 3])
|
181 |
-
att1_mha_time = print_kernel_execution(c1, c2, mha_flop, mha_bytes)
|
182 |
|
183 |
st.caption("Multi-Query Attention")
|
184 |
-
|
185 |
-
|
186 |
-
c1, c2 = st.columns([2, 3])
|
187 |
-
att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
|
188 |
|
189 |
st.subheader('Attention-value gemm')
|
190 |
st.write("Showing calculation for the maximum sequence length (n)")
|
|
|
95 |
exec_time = calc_exec_time(flop, nbytes)
|
96 |
return flop, nbytes, exec_time
|
97 |
|
98 |
+
def print_kernel_execution(flop, nbytes):
|
99 |
c1, c2 = st.columns([2, 3])
|
100 |
exec_time = calc_exec_time(flop, nbytes, include_overhead=False)
|
101 |
flop = round(flop/1e9, 2)
|
|
|
175 |
st.write("Showing calculation for the maximum sequence length (n)")
|
176 |
|
177 |
st.caption("Multi-Head Attention")
|
178 |
+
flop, nbytes, exec_time = att1_mha_exec(bs, h, n, d)
|
179 |
+
print_kernel_execution(flop, nbytes)
|
|
|
|
|
180 |
|
181 |
st.caption("Multi-Query Attention")
|
182 |
+
flop, nbytes, exec_time = att1_mqa_exec(bs, h, n, d)
|
183 |
+
print_kernel_execution(flop, nbytes)
|
|
|
|
|
184 |
|
185 |
st.subheader('Attention-value gemm')
|
186 |
st.write("Showing calculation for the maximum sequence length (n)")
|