Spaces:
Runtime error
Runtime error
Commit
·
3ddfba1
1
Parent(s):
5e8eef3
Update app.py
Browse files
app.py
CHANGED
@@ -115,13 +115,24 @@ for i in range(n_start, n):
|
|
115 |
mqa_time = shared_time + qkv_mqa_exec(bs, h, i, d)[2] + att1_mqa_exec(bs, h, i, d)[2] + att2_mqa_exec(bs, h, i, d)[2]
|
116 |
mqa_total_time += l*mqa_time
|
117 |
|
118 |
-
st.
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
121 |
|
122 |
st.header("Memory consumption")
|
|
|
|
|
123 |
num_params = 12*l*d*d
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
|
127 |
breakdown = st.checkbox("Show breakdown per operation")
|
|
|
115 |
mqa_time = shared_time + qkv_mqa_exec(bs, h, i, d)[2] + att1_mqa_exec(bs, h, i, d)[2] + att2_mqa_exec(bs, h, i, d)[2]
|
116 |
mqa_total_time += l*mqa_time
|
117 |
|
118 |
+
c1, c2 = st.columns(2, 4)
|
119 |
+
c1.write("Multi-Head Attention:")
|
120 |
+
c2.write(str(round(mha_total_time, 2)))
|
121 |
+
c1.write("Multi-Query Attention:")
|
122 |
+
c2.write(str(round(mqa_total_time, 2)))
|
123 |
+
c1.write("Speed-up MQA over MHA: ")
|
124 |
+
c2.write(str(round(mha_total_time/mqa_total_time),2))
|
125 |
|
126 |
st.header("Memory consumption")
|
127 |
+
st.caption("MHA")
|
128 |
+
c1, c2 = st.columns(2, 4)
|
129 |
num_params = 12*l*d*d
|
130 |
+
c1.write("Num Parameters (in B)")
|
131 |
+
c2.write(str(round(num_params/1e9, 3)))
|
132 |
+
c1.write("Storing activations")
|
133 |
+
acts = round(2*l*(d/h)*h*n/1e9, 2)
|
134 |
+
c2.write(str(acts))
|
135 |
+
|
136 |
|
137 |
|
138 |
breakdown = st.checkbox("Show breakdown per operation")
|