harmdevries commited on
Commit
f0d14cb
·
1 Parent(s): a275f69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -67,7 +67,7 @@ mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
67
  c1, c2 = st.columns([2, 3])
68
  att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
69
 
70
- st.header('Attention-value gemm')
71
  st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
72
  st.caption("Multi-Head Attention")
73
  mha_flop = 2*bs*h*n*(d/h)
@@ -82,6 +82,13 @@ c1, c2 = st.columns([2, 3])
82
  att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
83
 
84
  st.subheader('Output projection')
 
 
 
 
 
 
 
85
 
86
  st.header('MLP')
87
  st.subheader('First Linear')
 
67
  c1, c2 = st.columns([2, 3])
68
  att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
69
 
70
+ st.subheader('Attention-value gemm')
71
  st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
72
  st.caption("Multi-Head Attention")
73
  mha_flop = 2*bs*h*n*(d/h)
 
82
  att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
83
 
84
  st.subheader('Output projection')
85
+ mlp1_flop = 2*bs*1*d
86
+ mlp1_bytes = 2*bs*1*d + 2*d*4*d + 2*bs*1*4*d
87
+ c1, c2 = st.columns([2, 3])
88
+ mlp1_time = print_kernel_execution(c1, c2, mlp1_flop, mlp1_bytes)
89
+
90
+ st.subheader('Element-wise ops')
91
+ st.write("A couple of layers ")
92
 
93
  st.header('MLP')
94
  st.subheader('First Linear')