0-hero commited on
Commit
b2f8fe7
·
verified ·
1 Parent(s): c1384fc

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .launchpadlib/api.launchpad.net/cache/api.launchpad.net,devel,~deadsnakes,+archive,ubuntu,ppa,ws.op=getSigningKeyData-application,json,c76e9ed0b661c7fa5da42e8fb2da319e +22 -0
  2. .local/share/jupyter/nbextensions/snippets_menu/snippets_submenus_python/sympy.js +750 -0
  3. .local/share/jupyter/nbextensions/toc2/toc2.js +826 -0
  4. .local/share/jupyter/nbextensions/toc2/toc2.yaml +104 -0
  5. .local/share/jupyter/nbextensions/toggle_all_line_numbers/main.js +82 -0
  6. .local/share/jupyter/nbextensions/toggle_all_line_numbers/main.yaml +16 -0
  7. .local/share/jupyter/nbextensions/toggle_all_line_numbers/readme.md +5 -0
  8. .local/share/jupyter/nbextensions/tree-filter/demo.gif +0 -0
  9. .local/share/jupyter/nbextensions/varInspector/__pycache__/var_list.cpython-310.pyc +0 -0
  10. .local/share/jupyter/nbextensions/varInspector/demo.gif +0 -0
  11. .local/share/jupyter/nbextensions/varInspector/jquery.tablesorter.min.js +2 -0
  12. .local/share/jupyter/nbextensions/varInspector/main.css +119 -0
  13. .local/share/jupyter/nbextensions/varInspector/main.js +462 -0
  14. .local/share/jupyter/nbextensions/varInspector/varInspector.yaml +45 -0
  15. .local/share/jupyter/nbextensions/varInspector/var_list.r +17 -0
  16. .local/share/jupyter/nbextensions/zenmode/README.md +4 -0
  17. .local/share/jupyter/nbextensions/zenmode/images/back3.jpg +0 -0
  18. .local/share/jupyter/nbextensions/zenmode/main.css +34 -0
  19. .local/share/jupyter/nbextensions/zenmode/main.js +196 -0
  20. .local/share/jupyter/nbextensions/zenmode/zenmode.yaml +28 -0
  21. .triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.cubin +0 -0
  22. .triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.ttir +113 -0
  23. .triton/dump/174400122b6dbc99e086544aa1856b9f/triton_.cubin +0 -0
  24. .triton/dump/199215289adb100508718a5a762ba4d7/triton_.cubin +0 -0
  25. .triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.cubin +0 -0
  26. .triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.ptx +312 -0
  27. .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.cubin +0 -0
  28. .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.llir +332 -0
  29. .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ptx +446 -0
  30. .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ttgir +26 -0
  31. .triton/dump/305a9479aab997a3a16bfe46bb303a50/triton_.cubin +0 -0
  32. .triton/dump/345a87a492fd703c73ab83265a21fcb6/triton_.cubin +0 -0
  33. .triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ptx +809 -0
  34. .triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ttgir +152 -0
  35. .triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.cubin +0 -0
  36. .triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.ptx +295 -0
  37. .triton/dump/4c6ad48573c74d55ed79384f6b432d50/triton_.ttir +18 -0
  38. .triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttgir +28 -0
  39. .triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttir +27 -0
  40. .triton/dump/51e329eae41e4ee17aa201fff8371d94/triton_.llir +0 -0
  41. .triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.llir +600 -0
  42. .triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.ttir +153 -0
  43. .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.cubin +0 -0
  44. .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.llir +745 -0
  45. .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.ttir +101 -0
  46. .triton/dump/7dc5bb3e5c2bb99527fff34c6fba7810/triton_.ptx +277 -0
  47. .triton/dump/884b5df35d2a25fd91308249e7657806/triton_.ttir +17 -0
  48. .triton/dump/93ab21d512b10f4271e68c2f0ae3393c/triton_.cubin +0 -0
  49. .triton/dump/9a2fb05196b13393bea452d08e9aaca8/triton_.cubin +0 -0
  50. .triton/dump/9f68cc707cb8f8bff3232abf59cbd9ec/triton_.ptx +886 -0
.launchpadlib/api.launchpad.net/cache/api.launchpad.net,devel,~deadsnakes,+archive,ubuntu,ppa,ws.op=getSigningKeyData-application,json,c76e9ed0b661c7fa5da42e8fb2da319e ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ status: 200
2
+ date: Mon, 05 Feb 2024 23:25:35 GMT
3
+ server: gunicorn
4
+ x-powered-by: Zope (www.zope.org), Python (www.python.org)
5
+ content-security-policy: frame-ancestors 'self';
6
+ content-type: application/json
7
+ strict-transport-security: max-age=15552000
8
+ vary: Accept,Accept-Encoding
9
+ x-content-type-options: nosniff
10
+ x-frame-options: SAMEORIGIN
11
+ x-launchpad-revision: 9643586c585856148a18782148972ae9c1179d06
12
+ x-lazr-notifications: []
13
+ x-xss-protection: 1; mode=block
14
+ x-vcs-revision: 9643586c585856148a18782148972ae9c1179d06
15
+ x-request-id: 452e0c68-aa99-4bb4-abc3-237c7bb39fae
16
+ content-length: 1641
17
+ -content-encoding: gzip
18
+ content-location: https://api.launchpad.net/devel/~deadsnakes/+archive/ubuntu/ppa?ws.op=getSigningKeyData
19
+ -varied-accept: application/json
20
+ -varied-accept-encoding: gzip, deflate
21
+
22
+ "-----BEGIN PGP PUBLIC KEY BLOCK-----\n\nmQINBFl8fYEBEADQmGZ6pDrwY9iH9DVlwNwTOvOZ7q7lHXPl/TLfMs1tckMc/D9a\nhsdBN9VWtMmo+RySvhkIe8X15r65TFs2HE8ft6j2e/4K472pObM1hB+ajiU/wYX2\nSyq7DBlNm6YMP5/SyQzRxqis4Ja1uUjW4Q5/Csdf5In8uMzXj5D1P7qOiP2aNa0E\nr3w6PXWRTuTihWZOsHv8npyVYDBRR6gEZbd3r86snI/7o8Bfmad3KjbxL7aOdNMw\nAqQFaNKl7Y+UJpv1CNFIf+twcOoC0se1SrsVJlAH9HNHM7XGQsPUwpNvQlcmvr+t\n1vVS2m72lk3gyShDuJpi1TifGw+DoTqu54U0k+0sZm4pnQVeiizNkefU2UqOoGlt\n4oiG9nIhSX04xRlGes3Ya0OjNI5b1xbcYoR+r0c3odI+UCw3VSZtKDX/xlH1o/82\nb8ouXeE7LA1i4DvGNj4VSvoxv4ggIznxMf+PkWXWKwRGsbAAXF52rr4FUaeaKoIU\nDkJqHXAxrB3PQslZ+ZgBEukkQZF76NkqRqP1E7FXzZZMo2eEL7vtnhSzUlanOf42\nECBoWHVoZQaRFMNbGpqlg9aWedHGyetMStS3nH1sqanr+i4I8VR/UH+ilarPTW3T\nE0apWlsH8+N3IKbRx2wgrRZNoQEuyVtvyewDFYShJB3Zxt7VCy67vKAl1QARAQAB\ntBxMYXVuY2hwYWQgUFBBIGZvciBkZWFkc25ha2VziQI4BBMBAgAiBQJZfH2BAhsD\nBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRC6aTI2anVXdvwhD/4oI3yckeKn\n9aJNNTJsyw4ydMkIAOdG+jbZsYv/rN73UVQF1RA8HC71SDmbd0Nu80koBOX+USuL\nvvhoMIsARlD5dLx5f/zaQcYWJm/BtsMF/eZ4s1xsenwW6PpXd8FpaTn1qtg/8+O9\n99R4uSetAhhyf1vSRb/8U0sgSQd38mpZZFq352UuVisXnmCThj621loQubYJ3lwU\nLSLs8wmgo4XIYH7UgdavV9dfplPh0M19RHQL3wTyQP2KRNRq1rG7/n1XzUwDyqY6\neMVhdVhvnxAGztvdFCySVzBRr/rCw6quhcYQwBqdqaXhz63np+4mlUNfd8Eu+Vas\nb/tbteF/pDu0yeFMpK4X09Cwn2kYYCpq4XujijW+iRWb4MO3G8LLi8oBAHP/k0CM\n/QvSRbbG8JDQkQDH37Efm8iE/EttJTixjKAIfyugmvEHfcrnxaMoBioa6h6McQrM\nvI8bJirxorJzOVF4kY7xXvMYwjzaDC8G0fTA8SzQRaShksR3USXZjz8vS6tZ+YNa\nmRHPoZ3Ua0bz4t2aCcu/fknVGsXcNBazNIK9WF2665Ut/b7lDbojXsUZ3PpuqOoe\nGQL9LRj7nmCI6ugoKkNp8ZXcGJ8BGw37Wep2ztyzDohXp6f/4mGgy2KYV9R4S8D5\nyBDUU6BS7Su5nhQMStfdfr4FffLmnvFC9w==\n=7hFk\n-----END PGP PUBLIC KEY BLOCK-----\n"
.local/share/jupyter/nbextensions/snippets_menu/snippets_submenus_python/sympy.js ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ define([
2
+ "require",
3
+ "./sympy_functions",
4
+ "./sympy_assumptions",
5
+ ], function (requirejs, sympy_functions, sympy_assumptions) {
6
+ return {
7
+ 'name' : 'SymPy',
8
+ 'sub-menu' : [
9
+ {
10
+ 'name' : 'Setup',
11
+ 'snippet' : [
12
+ 'from __future__ import print_function, division',
13
+ 'from sympy import *',
14
+ 'a, s, t, u, v, w, x, y, z = symbols("a, s, t, u, v, w, x, y, z")',
15
+ 'k, m, n = symbols("k, m, n", integer=True)',
16
+ 'f, g, h = symbols("f, g, h", cls=Function)',
17
+ 'init_printing()',
18
+ ],
19
+ },
20
+ {
21
+ 'name' : 'Documentation',
22
+ 'external-link' : 'http://docs.sympy.org/latest/index.html',
23
+ },
24
+ '---',
25
+ {
26
+ 'name' : 'Constants',
27
+ 'sub-menu' : [
28
+ {
29
+ 'name' : '1',
30
+ 'snippet' : ['S(1)',], //'S.One',],
31
+ },
32
+ // {
33
+ // 'name' : '0',
34
+ // 'snippet' : ['S.Zero',],
35
+ // },
36
+ // {
37
+ // 'name' : '-1',
38
+ // 'snippet' : ['S.NegativeOne',],
39
+ // },
40
+ {
41
+ 'name' : '1/2',
42
+ 'snippet' : ['S(1)/2',], //'S.Half',],
43
+ },
44
+ {
45
+ 'name' : 'Rational numbers',
46
+ 'snippet' : ['Rational(3, 7)',],
47
+ },
48
+ '---',
49
+ {
50
+ 'name' : 'Base of natural logarithm, \\(e\\)',
51
+ 'snippet' : ['E',],
52
+ },
53
+ {
54
+ 'name' : 'Unit imaginary number, \\(i\\)',
55
+ 'snippet' : ['I',],
56
+ },
57
+ {
58
+ 'name' : 'Geometric constant, \\(\\pi\\)',
59
+ 'snippet' : ['pi',],
60
+ },
61
+ {
62
+ 'name' : 'Golden ratio, \\(\\phi\\)',
63
+ 'snippet' : ['GoldenRatio',],
64
+ },
65
+ {
66
+ 'name' : 'Euler-Mascheroni constant, \\(\\gamma\\)',
67
+ 'snippet' : ['EulerGamma',],
68
+ },
69
+ {
70
+ 'name' : 'Catalan\'s constant, \\(K\\)',
71
+ 'snippet' : ['Catalan',],
72
+ },
73
+ '---',
74
+ {
75
+ 'name' : 'Infinity, \\(\\infty\\)',
76
+ 'snippet' : ['oo',], // 'S.Infinity'
77
+ },
78
+ // {
79
+ // 'name' : 'Negative infinity, \\(-\\infty\\)',
80
+ // 'snippet' : ['S.NegativeInfinity',],
81
+ // },
82
+ {
83
+ 'name' : 'Complex infinity, \\(\\tilde{\\infty}\\)',
84
+ 'snippet' : ['zoo'], //'S.ComplexInfinity',],
85
+ },
86
+ {
87
+ 'name' : 'NaN',
88
+ 'snippet' : ['nan',], // 'S.NaN'
89
+ },
90
+ ],
91
+ },
92
+ sympy_functions,
93
+ {
94
+ 'name' : 'Calculus',
95
+ 'sub-menu' : [
96
+ {
97
+ 'name' : 'Differentiate once',
98
+ 'snippet' : [
99
+ 'expr = exp(x**2)',
100
+ 'deriv = diff(expr, x)',
101
+ ],
102
+ },
103
+ {
104
+ 'name' : 'Differentiate multiple times',
105
+ 'snippet' : [
106
+ 'expr = x**4',
107
+ 'deriv = diff(expr, x, 3)',
108
+ ],
109
+ },
110
+ {
111
+ 'name' : 'Mixed partial derivatives',
112
+ 'snippet' : [
113
+ 'expr = exp(x*y*z)',
114
+ 'deriv = diff(expr, x, y, 2, z, 4)',
115
+ ],
116
+ },
117
+ {
118
+ 'name' : 'Finite differences',
119
+ 'snippet' : [
120
+ 'dx0, dx1 = symbols("dx0, dx1")',
121
+ 'formula = as_finite_diff(f(x).diff(x), [x-dx0, x, x+dx1])',
122
+ ],
123
+ },
124
+ '---',
125
+ {
126
+ 'name' : 'Indefinite integral',
127
+ 'snippet' : [
128
+ 'integral = integrate(cos(x), x)',
129
+ ],
130
+ },
131
+ {
132
+ 'name' : 'Definite integral',
133
+ 'snippet' : [
134
+ 'integral = integrate(exp(-x), (x, 0, oo))',
135
+ ],
136
+ },
137
+ {
138
+ 'name' : 'Double integral',
139
+ 'snippet' : [
140
+ 'integral = integrate(exp(-x**2-y**2), (x, -oo, oo), (y, -oo, oo))',
141
+ ],
142
+ },
143
+ '---',
144
+ {
145
+ 'name' : 'Limits',
146
+ 'snippet' : [
147
+ 'lim = limit(sin(x)/x, x, 0, "+")',
148
+ ],
149
+ },
150
+ {
151
+ 'name' : 'Series expansion',
152
+ 'snippet' : [
153
+ 'expr = exp(sin(x))',
154
+ 'ser = series(expr, x, 0, 6)',
155
+ ],
156
+ },
157
+ {
158
+ 'name' : 'Series expansion, removing order term',
159
+ 'snippet' : [
160
+ 'expr = exp(sin(x))',
161
+ 'ser = series(expr, x, 0, 6).removeO()',
162
+ ],
163
+ },
164
+ {
165
+ 'name' : 'Summations',
166
+ 'snippet' : [
167
+ 'ell_min,ell,ell_max = symbols("ell_min,ell,ell_max", integer=True)',
168
+ 'summ = summation((2*ell + 1), (ell, ell_min, ell_max))',
169
+ ],
170
+ },
171
+ ],
172
+ },
173
+
174
+ {
175
+ 'name' : 'Solvers',
176
+ 'sub-menu' : [
177
+ {
178
+ 'name' : 'Solve for one variable',
179
+ 'snippet' : [
180
+ 'expr = x**4 - 4*x**3 + 2*x**2 - x',
181
+ 'eqn = Eq(expr, 0)',
182
+ 'soln = solve(eqn, x)',
183
+ ],
184
+ },
185
+ {
186
+ 'name' : 'Solve for two variables',
187
+ 'snippet' : [
188
+ 'eqns = Eq(x + y, 4), Eq(x*y, 3)',
189
+ 'soln = solve(eqns, [x,y])',
190
+ ],
191
+ },
192
+ {
193
+ 'name' : 'Solve differential equation',
194
+ 'snippet' : [
195
+ 'expr = f(x).diff(x, x) + 9*f(x)',
196
+ "eqn = Eq(expr, 1) # f''(x) + 9f(x) = 1",
197
+ 'soln = dsolve(eqn, f(x))',
198
+ ],
199
+ },
200
+ ],
201
+ },
202
+ {
203
+ 'name' : 'Manipulating expressions',
204
+ 'sub-menu' : [
205
+ {
206
+ 'name' : 'Simplify',
207
+ 'snippet' : [
208
+ 'expr = (x**3 + x**2 - x - 1)/(x**2 + 2*x + 1)',
209
+ 'expr = simplify(expr)',
210
+ ],
211
+ },
212
+ {
213
+ 'name' : 'Refine, using assumptions',
214
+ // 'snippet' : [
215
+ // 'expr = exp(pi*I*2*x)',
216
+ // 'assumption = Q.integer(x) & Q.integer(y)',
217
+ // 'expr = refine(expr, assumption)',
218
+ // ],
219
+ 'sub-menu' : [
220
+ {
221
+ 'name' : 'Refine',
222
+ 'snippet' : [
223
+ 'expr = exp(pi*I*2*(x+y))',
224
+ 'assumption = Q.integer(x) & Q.integer(y)',
225
+ 'expr = refine(expr, assumption)',
226
+ ],
227
+ },
228
+ {
229
+ 'name' : 'Refine in context manager',
230
+ 'snippet' : [
231
+ 'expr = exp(pi*I*2*(x+y))',
232
+ 'with assuming(Q.integer(x) & Q.integer(y)):',
233
+ ' expr = refine(expr)',
234
+ ],
235
+ },
236
+ sympy_assumptions,
237
+ ],
238
+ },
239
+ {
240
+ 'name' : 'Expansion',
241
+ 'sub-menu' : [
242
+ {
243
+ 'name' : 'Expand basic expressions',
244
+ 'snippet' : [
245
+ 'expr = (x + 2)*(x - 3)',
246
+ 'expr = expand(expr)',
247
+ ],
248
+ },
249
+ '---',
250
+ {
251
+ 'name' : 'Expand, including complex parts',
252
+ 'snippet' : [
253
+ 'expr = cos(x)',
254
+ 'expr = expand(expr, complex=True)',
255
+ ],
256
+ },
257
+ {
258
+ 'name' : 'Expand, including functions',
259
+ 'snippet' : [
260
+ 'expr = gamma(x+3)',
261
+ 'expr = expand(expr, func=True)',
262
+ ],
263
+ },
264
+ {
265
+ 'name' : 'Expand, including trig',
266
+ 'snippet' : [
267
+ 'expr = sin(x+y)*(x+y)',
268
+ 'expr = expand(expr, trig=True)',
269
+ ],
270
+ },
271
+ '---',
272
+ {
273
+ 'name' : 'Expand only real and imaginary parts',
274
+ 'snippet' : [
275
+ 'expand_complex(x)',
276
+ ],
277
+ },
278
+ {
279
+ 'name' : 'Expand only functions',
280
+ 'snippet' : [
281
+ 'expr = gamma(x + 2)',
282
+ 'expr = expand_func(expr)',
283
+ ],
284
+ },
285
+ {
286
+ 'name' : 'Expand only hypergeometric functions',
287
+ 'snippet' : [
288
+ 'expr = hyper([1,1], [1,], z) + gamma(z)',
289
+ 'expr = hyperexpand(expr)',
290
+ ],
291
+ },
292
+ {
293
+ 'name' : 'Expand only logarithms',
294
+ 'snippet' : [
295
+ 'a, b = symbols("a, b", positive=True)',
296
+ 'expr = log(a**2*b)',
297
+ 'expr = expand_log(expr)',
298
+ ],
299
+ },
300
+ {
301
+ 'name' : 'Expand only multiplication over addition',
302
+ 'snippet' : [
303
+ 'expr = y*(x + z)',
304
+ 'expr = expand_mul(expr)',
305
+ ],
306
+ },
307
+ {
308
+ 'name' : 'Expand only multinomials',
309
+ 'snippet' : [
310
+ 'expr = (x + y + z)**3',
311
+ 'expr = expand_multinomial(expr)',
312
+ ],
313
+ },
314
+ {
315
+ 'name' : 'Expand only powers of multiplied bases',
316
+ 'snippet' : [
317
+ 'a, b = symbols("a, b", positive=True)',
318
+ 'expr = (a*b)**z',
319
+ 'expr = expand_power_base(expr)',
320
+ ],
321
+ },
322
+ {
323
+ 'name' : 'Expand only addition in exponents',
324
+ 'snippet' : [
325
+ 'expr = x**(y + 2)',
326
+ 'expr = expand_power_exp(expr)',
327
+ ],
328
+ },
329
+ {
330
+ 'name' : 'Expand only trig',
331
+ 'snippet' : [
332
+ 'expr = sin(x+y)*(x+y)',
333
+ 'expr = expand_trig(expr)',
334
+ ],
335
+ },
336
+ ],
337
+ },
338
+ {
339
+ 'name' : 'Collect terms',
340
+ 'sub-menu' : [
341
+ {
342
+ 'name' : 'Collect as coefficients of one factor',
343
+ 'snippet' : [
344
+ 'expr = y*x**2 + z*x**2 + t*x - 2*x + 3',
345
+ 'expr = collect(expr, x)',
346
+ ],
347
+ },
348
+ {
349
+ 'name' : 'Collect as coefficients of multiple factors',
350
+ 'snippet' : [
351
+ 'expr = x**2 + y*x**2 + x*y + y + z*y',
352
+ 'expr = collect(expr, [x, y])',
353
+ ],
354
+ },
355
+ {
356
+ 'name' : 'Collect with respect to wild card',
357
+ 'snippet' : [
358
+ 'w = Wild("w")',
359
+ 'expr = z*x**y - t*z**y',
360
+ 'expr = collect(expr, w**y)',
361
+ ],
362
+ },
363
+ {
364
+ 'name' : 'Collect and apply function to each coefficient',
365
+ 'snippet' : [
366
+ 'expr = expand((x + y + 1)**3)',
367
+ 'expr = collect(expr, x, factor)',
368
+ ],
369
+ },
370
+ {
371
+ 'name' : 'Recursively collect',
372
+ 'snippet' : [
373
+ 'expr = (x**2*y + x*y + x + y)/(x*y + z*y)',
374
+ 'expr = rcollect(expr, y)',
375
+ ],
376
+ },
377
+ {
378
+ 'name' : 'Collect constants',
379
+ 'snippet' : [
380
+ 'expr = sqrt(3)*x + sqrt(7)*x + sqrt(3) + sqrt(7)',
381
+ 'expr = collect_const(expr)',
382
+ ],
383
+ },
384
+ ],
385
+ },
386
+ {
387
+ 'name' : 'Substitutions and replacements',
388
+ 'sub-menu' : [
389
+ {
390
+ 'name' : 'Substitute one subexpression for another',
391
+ 'snippet' : [
392
+ 'expr = 1 + x*y',
393
+ 'expr = expr.subs(x, pi)',
394
+ ],
395
+ },
396
+ {
397
+ 'name' : 'Substitute multiple subexpressions successively',
398
+ 'snippet' : [
399
+ 'expr = (x+y)/y',
400
+ 'substitutions = [(x+y, y), (y, x+y)]',
401
+ 'expr = expr.subs(substitutions)',
402
+ ],
403
+ },
404
+ {
405
+ 'name' : 'Substitute multiple subexpressions simultaneously',
406
+ 'snippet' : [
407
+ 'expr = (x+y)/y',
408
+ 'substitutions = [(x+y, y), (y, x+y)]',
409
+ 'expr = expr.subs(substitutions, simultaneous=True)',
410
+ ],
411
+ },
412
+ '---',
413
+ {
414
+ 'name' : 'Replace one object with another',
415
+ 'snippet' : [
416
+ 'expr = 1 + x*y',
417
+ 'expr = expr.replace(x, pi)',
418
+ ],
419
+ },
420
+ {
421
+ 'name' : 'Replace one object with some function of its arguments',
422
+ 'snippet' : [
423
+ 'expr = log(sin(x)) + tan(sin(x**2))',
424
+ 'expr = expr.replace(sin, lambda arg: sin(2*arg))',
425
+ ],
426
+ },
427
+ {
428
+ 'name' : 'Replace a pattern with an object',
429
+ 'snippet' : [
430
+ '# Note: `exclude=` specifies that the Wild cannot match any item in the list',
431
+ 'a, b = symbols("a, b", cls=Wild, exclude=[x,y])',
432
+ 'expr = 2*x + y + z',
433
+ 'wild = a*x + b',
434
+ 'replacement = b - a',
435
+ '# Note: `exact=True` demands that all Wilds have nonzero matches',
436
+ 'expr = expr.replace(wild, replacement, exact=True)',
437
+ ],
438
+ },
439
+ {
440
+ 'name' : 'Replace a pattern with some function of that object',
441
+ 'snippet' : [
442
+ 'a = symbols("a", cls=Wild, exclude=[])',
443
+ 'expr = log(sin(x)) + tan(sin(x**2))',
444
+ 'expr.replace(sin(a), lambda a: sin(2*a))',
445
+ ],
446
+ },
447
+ {
448
+ 'name' : 'Replace anything with some function of that thing',
449
+ 'snippet' : [
450
+ 'g = 2*sin(x**3)',
451
+ 'g.replace(lambda expr: expr.is_Function, lambda expr: expr**2)',
452
+ ],
453
+ },
454
+ '---',
455
+ {
456
+ 'name' : 'Replace exact subexpressions',
457
+ 'snippet' : [
458
+ 'expr = x**2 + x**4',
459
+ 'replacements = {x**2: y}',
460
+ 'expr = expr.xreplace(replacements)',
461
+ ],
462
+ },
463
+ // {
464
+ // 'name' : 'rewrite',
465
+ // 'snippet' : [
466
+ // 'expr = tan(x)',
467
+ // 'expr = expr.rewrite(sin)',
468
+ // ],
469
+ // },
470
+ ],
471
+ },
472
+ {
473
+ 'name' : 'Evaluation',
474
+ 'sub-menu' : [
475
+ {
476
+ 'name' : 'Evaluate numerically to arbitrary precision',
477
+ 'snippet' : [
478
+ 'expr = x * sqrt(8)',
479
+ 'precision = 50',
480
+ 'val = N(expr, precision, subs={x:2.4})',
481
+ ],
482
+ },
483
+ {
484
+ 'name' : 'Evaluate numerically to python float',
485
+ 'snippet' : [
486
+ 'expr = x * sqrt(8)',
487
+ 'val = float(expr.subs([(x, 2.4)]))',
488
+ ],
489
+ },
490
+ {
491
+ 'name' : 'Create numpy function for efficient evaluation',
492
+ 'snippet' : [
493
+ 'import numpy',
494
+ 'a = numpy.arange(10)',
495
+ 'expr = sin(x)',
496
+ 'f = lambdify(x, expr, "numpy")',
497
+ 'vals = f(a)',
498
+ ],
499
+ },
500
+ ],
501
+ },
502
+ '---',
503
+ {
504
+ 'name' : 'Polynomials',
505
+ 'sub-menu' : [
506
+ {
507
+ 'name' : 'Factor polynomial over rationals',
508
+ 'snippet' : [
509
+ 'expr = x**3 - x**2 + x - 1',
510
+ 'expr = factor(expr)',
511
+ ],
512
+ },
513
+ {
514
+ 'name' : 'Collect common powers of a term',
515
+ 'snippet' : [
516
+ 'expr = x*y + x - 3 + 2*x**2 - z*x**2 + x**3',
517
+ 'expr = collect(expr, x)',
518
+ ],
519
+ },
520
+ {
521
+ 'name' : 'Extract coefficient of a term',
522
+ 'snippet' : [
523
+ 'expr = 3+2*x+4*x**2',
524
+ 'expr = expr.coeff(x**2)',
525
+ ],
526
+ },
527
+ ],
528
+ },
529
+ {
530
+ 'name' : 'Rational functions',
531
+ 'sub-menu' : [
532
+ {
533
+ 'name' : 'Cancel',
534
+ 'snippet' : [
535
+ 'expr = (x**2 + 2*x + 1)/(x**2 + x)',
536
+ 'expr = cancel(expr)',
537
+ ],
538
+ },
539
+ {
540
+ 'name' : 'Decompose into partial fractions',
541
+ 'snippet' : [
542
+ 'expr = (4*x**3 + 21*x**2 + 10*x + 12)/(x**4 + 5*x**3 + 5*x**2 + 4*x)',
543
+ 'expr = apart(expr)',
544
+ ],
545
+ },
546
+ {
547
+ 'name' : 'Join over common denominator',
548
+ 'snippet' : [
549
+ 'expr = 1/x + 1/y',
550
+ 'expr = ratsimp(expr)',
551
+ ],
552
+ },
553
+ {
554
+ 'name' : 'Remove square roots from denominator',
555
+ 'snippet' : [
556
+ 'expr = 1/(1+I)',
557
+ 'expr = radsimp(expr)',
558
+ ],
559
+ },
560
+ ],
561
+ },
562
+ {
563
+ 'name' : 'Powers',
564
+ 'sub-menu' : [
565
+ {
566
+ 'name' : 'Important caveats',
567
+ 'external-link' : 'http://docs.sympy.org/dev/tutorial/simplification.html#powers'
568
+ },
569
+ '---',
570
+ // {
571
+ // 'name' : 'Setup for these snippets',
572
+ // 'snippet' : [
573
+ // 'x, y = symbols("x, y", positive=True)',
574
+ // 'a, b = symbols("a, b", real=True)',
575
+ // 'z, t, c = symbols("z, t, c")',
576
+ // ],
577
+ // },
578
+ {
579
+ 'name' : 'Simplify powers for general arguments',
580
+ 'snippet' : [
581
+ 'powsimp(x**y * x**z)',
582
+ ],
583
+ },
584
+ {
585
+ 'name' : 'Simplify powers, forcing assumptions',
586
+ 'snippet' : [
587
+ 'powsimp(x**y * x**z, force=True)',
588
+ ],
589
+ },
590
+ {
591
+ 'name' : 'Expand powers by exponent for general arguments',
592
+ 'snippet' : [
593
+ 'expand_power_exp(x**(y + z))',
594
+ ],
595
+ },
596
+ {
597
+ 'name' : 'Expand powers of multiplied bases, forcing assumptions',
598
+ 'snippet' : [
599
+ 'expand_power_base((x*y)**z, force=True)',
600
+ ],
601
+ },
602
+ {
603
+ 'name' : 'Collect exponents on powers for general arguments',
604
+ 'snippet' : [
605
+ 'powdenest((x**y)**z)',
606
+ ],
607
+ },
608
+ {
609
+ 'name' : 'Collect exponents on powers, forcing assumptions',
610
+ 'snippet' : [
611
+ 'powdenest((x**y)**z, force=True)',
612
+ ],
613
+ },
614
+ {
615
+ 'name' : 'Collect exponents on powers, forcing assumptions and polar simplifications',
616
+ 'snippet' : [
617
+ 'powdenest((z**a)**b, force=True, polar=True)',
618
+ ],
619
+ },
620
+ {
621
+ 'name' : 'Denest square-roots',
622
+ 'snippet' : [
623
+ 'sqrtdenest(sqrt(5 + 2*sqrt(6)))',
624
+ ],
625
+ },
626
+ ],
627
+ },
628
+ {
629
+ 'name' : 'Exponentials and Logarithms',
630
+ 'sub-menu' : [
631
+ {
632
+ 'name' : 'Important caveats',
633
+ 'external-link' : 'http://docs.sympy.org/dev/tutorial/simplification.html#exponentials-and-logarithms'
634
+ },
635
+ '---',
636
+ // {
637
+ // 'name' : 'Setup for these snippets',
638
+ // 'snippet' : [
639
+ // 'x, y = symbols("x, y", positive=True)',
640
+ // 'n = symbols("n", real=True)',
641
+ // ],
642
+ // },
643
+ {
644
+ 'name' : 'Combine exponentials',
645
+ 'snippet' : [
646
+ 'powsimp(exp(y) * exp(z))',
647
+ ],
648
+ },
649
+ {
650
+ 'name' : 'Expand logarithms for general arguments',
651
+ 'snippet' : [
652
+ 'expand_log(log(x*y))',
653
+ ],
654
+ },
655
+ {
656
+ 'name' : 'Expand logarithms, forcing assumptions',
657
+ 'snippet' : [
658
+ 'expand_log(log(z**2), force=True)',
659
+ ],
660
+ },
661
+ {
662
+ 'name' : 'Combine logarithms for general arguments',
663
+ 'snippet' : [
664
+ 'logcombine(log(x) + z*log(y))',
665
+ ],
666
+ },
667
+ {
668
+ 'name' : 'Combine logarithms, forcing assumptions',
669
+ 'snippet' : [
670
+ 'logcombine(log(x) + z*log(y))',
671
+ ],
672
+ },
673
+ {
674
+ 'name' : 'Simplification, possibly to trig functions',
675
+ 'snippet' : [
676
+ 'exptrigsimp(exp(z) + exp(-z))',
677
+ ],
678
+ },
679
+ ],
680
+ },
681
+ {
682
+ 'name' : 'Trigonometric functions',
683
+ 'sub-menu' : [
684
+ {
685
+ 'name' : 'Expansion',
686
+ 'snippet' : [
687
+ 'expr = sin(x + y)',
688
+ 'expr = expand(expr, trig=True)',
689
+ ],
690
+ },
691
+ {
692
+ 'name' : 'Simplification',
693
+ 'snippet' : [
694
+ 'expr = sin(x)**4 - 2*cos(x)**2*sin(x)**2 + cos(x)**4',
695
+ 'expr = trigsimp(expr)',
696
+ ],
697
+ },
698
+ {
699
+ 'name' : 'Simplification, possibly to exponentials',
700
+ 'snippet' : [
701
+ 'expr = cosh(z) - sinh(z)',
702
+ 'expr = exptrigsimp(expr)',
703
+ ],
704
+ },
705
+ ],
706
+ },
707
+ {
708
+ 'name' : 'Miscellaneous',
709
+ 'sub-menu' : [
710
+ {
711
+ 'name' : 'Simplify factorials',
712
+ 'snippet' : [
713
+ 'expr = factorial(n)/factorial(n - 3)',
714
+ 'expr = combsimp(expr)',
715
+ ],
716
+ },
717
+ {
718
+ 'name' : 'Simplify binomials',
719
+ 'snippet' : [
720
+ 'expr = binomial(n+1, k+1)/binomial(n, k)',
721
+ 'expr = combsimp(expr)',
722
+ ],
723
+ },
724
+ {
725
+ 'name' : 'Simplify numerical expressions to exact values',
726
+ 'snippet' : [
727
+ 'nsimplify(4.0/(1+sqrt(5.0)), constants=[GoldenRatio,])',
728
+ ],
729
+ },
730
+ {
731
+ 'name' : 'Expand gamma functions',
732
+ 'snippet' : [
733
+ 'expr = gamma(z+3)',
734
+ 'expr = expand_func(expr)',
735
+ ],
736
+ },
737
+ {
738
+ 'name' : 'Simplify Bessel functions',
739
+ 'snippet' : [
740
+ 'expr = besselj(x, z*polar_lift(-1))',
741
+ 'expr = besselsimp(expr)',
742
+ ],
743
+ },
744
+ ],
745
+ },
746
+ ],
747
+ },
748
+ ],
749
+ };
750
+ });
.local/share/jupyter/nbextensions/toc2/toc2.js ADDED
@@ -0,0 +1,826 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ (requirejs.specified('base/js/namespace') ? define : function(deps, callback) {
2
+ "use strict";
3
+ // if here, the Jupyter namespace hasn't been specified to be loaded.
4
+ // This means that we're probably embedded in a page, so we need to make
5
+ // our definition with a specific module name
6
+ return define('nbextensions/toc2/toc2', deps, callback);
7
+ })(['jquery', 'require'], function($, requirejs) {
8
+ "use strict";
9
+
10
+ var IPython;
11
+ var events;
12
+ var liveNotebook = false;
13
+ var all_headers = $("#notebook").find(":header");
14
+
15
+ // default values for system-wide configurable parameters
16
+ var default_cfg = {
17
+ colors: {
18
+ hover_highlight: '#DAA520',
19
+ selected_highlight: '#FFD700',
20
+ running_highlight: '#FF0000',
21
+ wrapper_background: '#FFFFFF',
22
+ sidebar_border: '#EEEEEE',
23
+ navigate_text: '#333333',
24
+ navigate_num: '#000000',
25
+ on_scroll: '#2447f0',
26
+ },
27
+ collapse_to_match_collapsible_headings: false,
28
+ markTocItemOnScroll: true,
29
+ moveMenuLeft: true,
30
+ navigate_menu: true,
31
+ threshold: 4,
32
+ widenNotebook: false,
33
+ };
34
+ // default values for per-notebook configurable parameters
35
+ var metadata_settings = {
36
+ nav_menu: {},
37
+ number_sections: true,
38
+ sideBar: true,
39
+ skip_h1_title: false,
40
+ base_numbering: 1,
41
+ title_cell: 'Table of Contents',
42
+ title_sidebar: 'Contents',
43
+ toc_cell: false,
44
+ toc_position: {},
45
+ toc_section_display: true,
46
+ toc_window_display: false,
47
+ };
48
+ $.extend(true, default_cfg, metadata_settings);
49
+
50
+ /**
51
+ * Read our config from server config & notebook metadata
52
+ * This function should only be called when both:
53
+ * 1. the notebook (and its metadata) has fully loaded
54
+ * AND
55
+ * 2. Jupyter.notebook.config.loaded has resolved
56
+ */
57
+ var read_config = function () {
58
+ var cfg = default_cfg;
59
+
60
+ if (!liveNotebook) {
61
+ return cfg;
62
+ }
63
+
64
+ // config may be specified at system level or at document level.
65
+ // first, update defaults with config loaded from server
66
+ $.extend(true, cfg, IPython.notebook.config.data.toc2);
67
+ // ensure notebook metadata has toc object, cache old values
68
+ var md = IPython.notebook.metadata.toc || {};
69
+ // reset notebook metadata to remove old values
70
+ IPython.notebook.metadata.toc = {};
71
+ // then update cfg with any found in current notebook metadata
72
+ // and save in nb metadata (then can be modified per document)
73
+ Object.keys(metadata_settings).forEach(function (key) {
74
+ cfg[key] = IPython.notebook.metadata.toc[key] = (md.hasOwnProperty(key) ? md : cfg)[key];
75
+ });
76
+ return cfg;
77
+ };
78
+
79
+ // globally-used status variables:
80
+ var rendering_toc_cell = false;
81
+ // toc_position default also serves as the defaults for a non-live notebook
82
+ var toc_position = {height: 'calc(100% - 180px)', width: '20%', left: '10px', top: '150px'};
83
+
84
+ try {
85
+ // this will work in a live notebook because nbextensions & custom.js
86
+ // are loaded by/after notebook.js, which requires base/js/namespace
87
+ IPython = requirejs('base/js/namespace');
88
+ events = requirejs('base/js/events');
89
+ liveNotebook = true;
90
+ } catch (err) {
91
+ // We *are* theoretically in a non-live notebook
92
+ console.log('[toc2] working in non-live notebook'); //, err);
93
+ // in non-live notebook, there's no event structure, so we make our own
94
+ if (window.events === undefined) {
95
+ var Events = function() {};
96
+ window.events = $([new Events()]);
97
+ }
98
+ events = window.events;
99
+ }
100
+ var Jupyter = IPython;
101
+
102
+ var setMd = function(key, value) {
103
+ if (liveNotebook) {
104
+ var md = IPython.notebook.metadata.toc;
105
+ if (md === undefined) {
106
+ md = IPython.notebook.metadata.toc = {};
107
+ }
108
+ var old_val = md[key];
109
+ md[key] = value;
110
+ if (typeof _ !== undefined ? !_.isEqual(value, old_val) : old_val != value) {
111
+ IPython.notebook.set_dirty();
112
+ }
113
+ }
114
+ return value;
115
+ };
116
+
117
+ function incr_lbl(ary, h_idx) { //increment heading label w/ h_idx (zero based)
118
+ ary[h_idx]++;
119
+ for (var j = h_idx + 1; j < ary.length; j++) {
120
+ ary[j] = 0;
121
+ }
122
+ return ary.slice(0, h_idx + 1);
123
+ }
124
+
125
+ function removeMathJaxPreview(elt) {
126
+ elt.children('.anchor-link, .toc-mod-link').remove();
127
+ elt.find("script[type='math/tex']").each(
128
+ function(i, e) {
129
+ $(e).replaceWith('$' + $(e).text() + '$')
130
+ })
131
+ elt.find("span.MathJax_Preview").remove()
132
+ elt.find("span.MathJax").remove()
133
+ return elt
134
+ }
135
+
136
+ var callback_toc_link_click = function(evt) {
137
+ // workaround for https://github.com/jupyter/notebook/issues/699
138
+ setTimeout(function() {
139
+ $.ajax()
140
+ }, 100);
141
+ evt.preventDefault();
142
+ // Each time a link is clicked in the toc, save the current position and target in the history
143
+ var currentSection = $('#toc .highlight_on_scroll a').data('tocModifiedId')
144
+ if (window.history.state != null){
145
+ if (window.history.state.back != currentSection) {
146
+ window.history.pushState({'back':currentSection},"",'')
147
+ }
148
+ }
149
+ var trg_id = $(evt.currentTarget).attr('data-toc-modified-id');
150
+ window.history.pushState({'back':trg_id},"",'');
151
+ window.history.lastjump = trg_id;
152
+
153
+ // use native scrollIntoView method with semi-unique id
154
+ // ! browser native click does't follow links on all browsers
155
+ document.getElementById(trg_id).scrollIntoView(true)
156
+ if (liveNotebook) {
157
+ // use native document method as jquery won't cope with characters
158
+ // like . in an id
159
+ var cell = $(document.getElementById(trg_id)).closest('.cell').data('cell');
160
+ Jupyter.notebook.select(Jupyter.notebook.find_cell_index(cell));
161
+ highlight_toc_item("toc_link_click", {
162
+ cell: cell
163
+ });
164
+ }
165
+ };
166
+
167
+ //
168
+ window.addEventListener('popstate',
169
+ function(e) {
170
+ if (e.state != null && e.state.back != null) {
171
+ var back_id = e.state.back;
172
+ document.getElementById(back_id).scrollIntoView(true)
173
+ if (liveNotebook) {
174
+ var cell = $(document.getElementById(back_id)).closest('.cell').data('cell');
175
+ Jupyter.notebook.select(Jupyter.notebook.find_cell_index(cell));
176
+ highlight_toc_item("toc_link_click", {
177
+ cell: cell
178
+ });
179
+ }
180
+ }
181
+ });
182
+
183
+ var make_link = function(h, toc_mod_id) {
184
+ var a = $('<a>')
185
+ .attr({
186
+ 'href': h.find('.anchor-link').attr('href'),
187
+ 'data-toc-modified-id': toc_mod_id,
188
+ });
189
+ // get the text *excluding* the link text, whatever it may be
190
+ var hclone = h.clone();
191
+ hclone = removeMathJaxPreview(hclone);
192
+ a.html(hclone.html());
193
+ a.on('click', callback_toc_link_click);
194
+ return a;
195
+ };
196
+
197
+ function highlight_toc_item(evt, data) {
198
+ var c = $(data.cell.element);
199
+ if (c.length < 1) {
200
+ return;
201
+ }
202
+ var trg_id = c.find('.toc-mod-link').attr('id') ||
203
+ c.prevAll().find('.toc-mod-link').eq(-1).attr('id');
204
+ var highlighted_item = $();
205
+ if (trg_id !== undefined) {
206
+ highlighted_item = $('.toc a').filter(function(idx, elt) {
207
+ return $(elt).attr('data-toc-modified-id') === trg_id;
208
+ });
209
+ }
210
+ if (evt.type === 'execute') {
211
+ // remove the selected class and add execute class
212
+ // if the cell is selected again, it will be highligted as selected+running
213
+ highlighted_item.removeClass('toc-item-highlight-select').addClass('toc-item-highlight-execute');
214
+ } else {
215
+ $('.toc .toc-item-highlight-select').removeClass('toc-item-highlight-select');
216
+ highlighted_item.addClass('toc-item-highlight-select');
217
+ }
218
+ }
219
+
220
+ var create_navigate_menu = function(cfg, callback) {
221
+ $('#kernel_menu').parent().after('<li id="Navigate"/>')
222
+ $('#Navigate').addClass('dropdown').append($('<a/>').attr('href', '#').attr('id', 'Navigate_sub'))
223
+ $('#Navigate_sub').text('Navigate').addClass('dropdown-toggle').attr('data-toggle', 'dropdown')
224
+ $('#Navigate').append($('<ul/>').attr('id', 'Navigate_menu').addClass('dropdown-menu')
225
+ .append($("<div/>").attr("id", "navigate_menu").addClass('toc')))
226
+
227
+ if (cfg['nav_menu']) {
228
+ $('#Navigate_menu').css(cfg['nav_menu'])
229
+ $('#navigate_menu').css('width', $('#Navigate_menu').css('width'))
230
+ $('#navigate_menu').css('height', $('#Navigate_menu').height())
231
+ } else {
232
+ cfg.nav_menu = {};
233
+ events.on("before_save.Notebook",
234
+ function() {
235
+ try {
236
+ cfg.nav_menu['width'] = $('#Navigate_menu').css('width')
237
+ cfg.nav_menu['height'] = $('#Navigate_menu').css('height')
238
+ } catch (e) {
239
+ console.log("[toc2] Error in metadata (navigation menu) - Proceeding", e)
240
+ }
241
+ })
242
+ }
243
+
244
+ $('#Navigate_menu').resizable({
245
+ resize: function(event, ui) {
246
+ $('#navigate_menu').css('width', $('#Navigate_menu').css('width'))
247
+ $('#navigate_menu').css('height', $('#Navigate_menu').height())
248
+ },
249
+ stop: function(event, ui) {
250
+ cfg.nav_menu['width'] = $('#Navigate_menu').css('width')
251
+ cfg.nav_menu['height'] = $('#Navigate_menu').css('height')
252
+ }
253
+ })
254
+
255
+ callback && callback();
256
+ }
257
+
258
+ function setNotebookWidth(cfg, st) {
259
+ var margin = 20;
260
+ var nb_inner = $('#notebook-container');
261
+ var nb_wrap_w = $('#notebook').width();
262
+ var sidebar = $('#toc-wrapper');
263
+ var visible_sidebar = cfg.sideBar && sidebar.is(':visible');
264
+ var sidebar_w = visible_sidebar ? sidebar.outerWidth() : 0;
265
+ var available_space = nb_wrap_w - 2 * margin - sidebar_w;
266
+ var inner_css = {marginLeft: '', width: ''};
267
+ if (cfg.widenNotebook) {
268
+ inner_css.width = available_space;
269
+ }
270
+ if (visible_sidebar) {
271
+ var nb_inner_w = nb_inner.outerWidth();
272
+ inner_css.marginLeft = sidebar_w + margin; // shift notebook rightward to fit the sidebar in
273
+ if (available_space <= nb_inner_w) {
274
+ inner_css.width = available_space; // also slim notebook to fit sidebar
275
+ }
276
+ }
277
+ nb_inner.css(inner_css);
278
+ }
279
+
280
+ var saveTocPosition = function () {
281
+ var toc_wrapper = $('#toc-wrapper');
282
+ var new_values = toc_wrapper.hasClass('sidebar-wrapper') ? ['width'] : ['left', 'top', 'height', 'width'];
283
+ $.extend(toc_position, toc_wrapper.css(new_values));
284
+ setMd('toc_position', toc_position);
285
+ };
286
+
287
+ var makeUnmakeMinimized = function (cfg, animate) {
288
+ var open = cfg.sideBar || cfg.toc_section_display;
289
+ var new_css, wrap = $('#toc-wrapper');
290
+ var anim_opts = {duration: animate ? 'fast' : 0};
291
+ if (open) {
292
+ $('#toc').show();
293
+ new_css = cfg.sideBar ? {} : {height: toc_position.height, width: toc_position.width};
294
+ }
295
+ else {
296
+ new_css = {
297
+ height: wrap.outerHeight() - wrap.find('#toc').outerHeight(),
298
+ };
299
+ anim_opts.complete = function () {
300
+ $('#toc').hide();
301
+ $('#toc-wrapper').css('width', '');
302
+ };
303
+ }
304
+ wrap.toggleClass('closed', !open)
305
+ .animate(new_css, anim_opts)
306
+ .find('.hide-btn').attr('title', open ? 'Hide ToC' : 'Show ToC');
307
+ return open;
308
+ };
309
+
310
+ var makeUnmakeSidebar = function (cfg) {
311
+ var make_sidebar = cfg.sideBar;
312
+ var wrap = $('#toc-wrapper')
313
+ .toggleClass('sidebar-wrapper', make_sidebar)
314
+ .toggleClass('float-wrapper', !make_sidebar)
315
+ .resizable('option', 'handles', make_sidebar ? 'e' : 'all');
316
+ wrap.children('.ui-resizable-se').toggleClass('ui-icon', !make_sidebar);
317
+ wrap.children('.ui-resizable-e').toggleClass('ui-icon ui-icon-grip-dotted-vertical', make_sidebar);
318
+ if (make_sidebar) {
319
+ var sidebar_top = liveNotebook ? document.getElementById('site').top : 0
320
+ wrap.css({top: sidebar_top,height: "",left: 0});
321
+ }
322
+ else {
323
+ wrap.css({height: toc_position.height});
324
+ }
325
+ setNotebookWidth(cfg);
326
+ };
327
+
328
+ var create_toc_div = function(cfg, st) {
329
+
330
+ var callbackPageResize = function (evt) {
331
+ setNotebookWidth(cfg);
332
+ };
333
+
334
+ var toc_wrapper = $('<div id="toc-wrapper"/>')
335
+ .css('display', 'none')
336
+ .append(
337
+ $('<div id="toc-header"/>')
338
+ .append('<span class="header"/>')
339
+ .append(
340
+ $('<i class="fa fa-fw hide-btn" title="Hide ToC">')
341
+ .on('click', function (evt) {
342
+ cfg.toc_section_display = setMd('toc_section_display', !cfg.toc_section_display);
343
+ makeUnmakeMinimized(cfg, true);
344
+ })
345
+ ).append(
346
+ $('<i class="fa fa-fw fa-refresh" title="Reload ToC">')
347
+ .on('click', function(evt) {
348
+ var icon = $(evt.currentTarget).addClass('fa-spin');
349
+ table_of_contents(cfg, st);
350
+ icon.removeClass('fa-spin');
351
+ })
352
+ ).append(
353
+ $('<i class="fa fa-fw fa-cog" title="ToC settings"/>')
354
+ .on('click', function(evt) {
355
+ show_settings_dialog(cfg, st);
356
+ })
357
+ )
358
+ ).append(
359
+ $("<div/>").attr("id", "toc").addClass('toc')
360
+ )
361
+ .prependTo(liveNotebook ? '#site' : document.body);
362
+
363
+ // enable dragging and save position on stop moving
364
+ toc_wrapper.draggable({
365
+ drag: function(event, ui) {
366
+ var make_sidebar = ui.position.left < 20; // 20 is snapTolerance
367
+ if (make_sidebar) {
368
+ ui.position.top = liveNotebook ? document.getElementById('site').top : 0
369
+ ui.position.left = 0;
370
+ }
371
+ if (make_sidebar !== cfg.sideBar) {
372
+ cfg.toc_section_display = setMd('toc_section_display', true);
373
+ cfg.sideBar = setMd('sideBar', make_sidebar);
374
+ makeUnmakeMinimized(cfg);
375
+ makeUnmakeSidebar(cfg);
376
+ }
377
+ }, //end of drag function
378
+ stop: saveTocPosition,
379
+ containment: 'parent',
380
+ snap: 'body, #site',
381
+ snapTolerance: 20,
382
+ });
383
+
384
+ toc_wrapper.resizable({
385
+ handles: 'all',
386
+ resize: function(event, ui) {
387
+ if (cfg.sideBar) {
388
+ // unset the height set by jquery resizable
389
+ $('#toc-wrapper').css('height', '');
390
+ setNotebookWidth(cfg, st)
391
+ }
392
+ },
393
+ start: function(event, ui) {
394
+ if (!cfg.sideBar) {
395
+ cfg.toc_section_display = setMd('toc_section_display', true);
396
+ makeUnmakeMinimized(cfg);
397
+ }
398
+ },
399
+ stop: saveTocPosition,
400
+ containment: 'parent',
401
+ minHeight: 100,
402
+ minWidth: 165,
403
+ });
404
+
405
+ // On header/menu/toolbar resize, resize the toc itself
406
+ $(window).on('resize', callbackPageResize);
407
+ if (liveNotebook) {
408
+ events.on("resize-header.Page toggle-all-headers", callbackPageResize);
409
+ $.extend(toc_position, IPython.notebook.metadata.toc.toc_position);
410
+ }
411
+ else {
412
+ // default to true for non-live notebook
413
+ cfg.toc_window_display = true;
414
+ }
415
+ // restore toc position at load
416
+ toc_wrapper.css(cfg.sideBar ? {width: toc_position.width} : toc_position);
417
+ // older toc2 versions stored string representations, so update those
418
+ if (cfg.toc_window_display === 'none') {
419
+ cfg.toc_window_display = setMd('toc_window_display', false);
420
+ }
421
+ if (cfg.toc_section_display === 'none') {
422
+ cfg.toc_section_display = setMd('toc_section_display', false);
423
+ }
424
+ toc_wrapper.toggle(cfg.toc_window_display);
425
+ makeUnmakeSidebar(cfg);
426
+ $("#toc_button").toggleClass('active', cfg.toc_window_display);
427
+ if (!cfg.toc_section_display) {
428
+ makeUnmakeMinimized(cfg);
429
+ }
430
+ };
431
+
432
+ //----------------------------------------------------------------------------
433
+ // on scroll - mark the toc item corresponding to the first header visible in
434
+ // the viewport with 'highlight_on_scroll' class
435
+ // some elements from https://stackoverflow.com/questions/20791374/jquery-check-if-element-is-visible-in-viewport
436
+ function highlightTocItemOnScroll(cfg, st) {
437
+ if (cfg.markTocItemOnScroll) {
438
+ var scrolling_elt = liveNotebook ? '#site' : window
439
+ $(scrolling_elt).scroll(function() {
440
+ var headerVisibleHeight = $('#header').is(':visible') ? $('#header').height() : 0
441
+ var headerHeight = liveNotebook ? headerVisibleHeight : 0
442
+ var bottom_of_screen = $(window).scrollTop() + $(scrolling_elt).height() + headerHeight;
443
+ var top_of_screen = $(window).scrollTop() + headerHeight;
444
+ //loop over all headers
445
+ all_headers.each(function(i, h) {
446
+ var top_of_element = $(h).offset().top;
447
+
448
+ if ((bottom_of_screen > top_of_element) && (top_of_screen < top_of_element)) {
449
+ // The element is visible
450
+ var trg_id = $(h).attr('data-toc-modified-id')
451
+ if (trg_id !== undefined) {
452
+ var highlighted_item = $('#toc a').filter(function(idx, elt) {
453
+ return $(elt).attr('data-toc-modified-id') === trg_id;
454
+ });
455
+ $('#toc .highlight_on_scroll').removeClass('highlight_on_scroll')
456
+ highlighted_item.parent().addClass('highlight_on_scroll')
457
+ }
458
+ return false;
459
+ } else {
460
+ // The element is not visible
461
+ // If the current header is already below the viewport then break
462
+ if (bottom_of_screen < top_of_element) return false
463
+ else return
464
+ }
465
+ })
466
+ });
467
+ }
468
+ }
469
+ //----------------------------------------------------------------------------
470
+ // TOC CELL -- if cfg.toc_cell=true, add and update a toc cell in the notebook.
471
+ // This cell, initially at the very beginning, can be moved.
472
+ // Its contents are automatically updated.
473
+ // Optionnaly, the sections in the toc can be numbered.
474
+
475
+ function process_cell_toc(cfg, st) {
476
+ var new_html = '<h1>' +
477
+ $('<div>').text(cfg.title_cell).html() + '<span class="tocSkip"></span></h1>\n' +
478
+ '<div class="toc">' +
479
+ $('#toc').html() +
480
+ '</div>';
481
+ if (!liveNotebook) {
482
+ if (cfg.toc_cell) {
483
+ $('.cell > .toc').parent(':has(.tocSkip)')
484
+ .html(new_html)
485
+ .find('.toc-item li a')
486
+ .on('click', callback_toc_link_click);
487
+ }
488
+ return;
489
+ }
490
+ var cell_toc;
491
+ // look for a possible toc cell
492
+ var cells = IPython.notebook.get_cells();
493
+ var lcells = cells.length;
494
+ for (var i = 0; i < lcells; i++) {
495
+ if (cells[i].metadata.toc) {
496
+ // delete if we don't want it
497
+ if (!cfg.toc_cell) {
498
+ return IPython.notebook.delete_cell(i);
499
+ }
500
+ cell_toc = cells[i];
501
+ break;
502
+ }
503
+ }
504
+ //if toc_cell=true, we want a cell_toc.
505
+ // If it does not exist, create it at the beginning of the notebook
506
+ if (cfg.toc_cell) {
507
+ if (cell_toc === undefined) {
508
+ // set rendering_toc_cell flag to avoid loop on insert_cell_above
509
+ rendering_toc_cell = true;
510
+ cell_toc = IPython.notebook.insert_cell_above('markdown', 0);
511
+ cell_toc.metadata.toc = true;
512
+ rendering_toc_cell = false;
513
+ }
514
+ // set rendering_toc_cell flag to avoid loop on render
515
+ rendering_toc_cell = true;
516
+ cell_toc.set_text(new_html);
517
+ cell_toc.render();
518
+ rendering_toc_cell = false;
519
+ cell_toc.element.find('.toc-item li a').on('click', callback_toc_link_click);
520
+ }
521
+ } //end function process_cell_toc --------------------------
522
+
523
+ var collapse_by_id = function(trg_id, show, trigger_event) {
524
+ var anchors = $('.toc .toc-item > li > span > a').filter(function(idx, elt) {
525
+ return $(elt).attr('data-toc-modified-id') === trg_id;
526
+ });
527
+ anchors.siblings('i')
528
+ .toggleClass('fa-caret-right', !show)
529
+ .toggleClass('fa-caret-down', show);
530
+ anchors.parent().siblings('ul')[show ? 'slideDown' : 'slideUp']('fast');
531
+ if (trigger_event !== false) {
532
+ // fire event for collapsible_heading to catch
533
+ var cell = $(document.getElementById(trg_id)).closest('.cell').data('cell');
534
+ events.trigger((show ? 'un' : '') + 'collapse.Toc', {
535
+ cell: cell
536
+ });
537
+ }
538
+ };
539
+
540
+ var callback_toc2_collapsible_headings = function(evt, data) {
541
+ var trg_id = data.cell.element.find(':header').filter(function(idx, elt) {
542
+ return Boolean($(elt).attr('data-toc-modified-id'));
543
+ }).attr('data-toc-modified-id');
544
+ var show = evt.type.indexOf('un') >= 0;
545
+ // use trigger_event false to avoid re-triggering collapsible_headings
546
+ collapse_by_id(trg_id, show, false);
547
+ };
548
+
549
+ var callback_collapser = function(evt) {
550
+ var clicked_i = $(evt.currentTarget);
551
+ var trg_id = clicked_i.siblings('a').attr('data-toc-modified-id');
552
+ var show = clicked_i.hasClass('fa-caret-right');
553
+ collapse_by_id(trg_id, show);
554
+ };
555
+
556
+ // Table of Contents =================================================================
557
+ var table_of_contents = function(cfg, st) {
558
+
559
+ // if this call is a result of toc_cell rendering, do nothing to avoid
560
+ // looping, as we're already in a table_of_contents call
561
+ if (rendering_toc_cell) {
562
+ return
563
+ }
564
+
565
+ // In a live notebook, read_config will have been called already, but
566
+ // in non-live notebooks, ensure that all config values are defined.
567
+ if (!liveNotebook) {
568
+ cfg = $.extend(true, {}, default_cfg, cfg);
569
+ }
570
+
571
+ var toc_wrapper = $("#toc-wrapper");
572
+ if (toc_wrapper.length === 0) { // toc window doesn't exist at all
573
+ create_toc_div(cfg, st); // create it
574
+ highlightTocItemOnScroll(cfg, st); // initialize highlighting on scroll
575
+ }
576
+ var ul = $('<ul/>').addClass('toc-item');
577
+
578
+ // update sidebar/window title
579
+ $('#toc-header > .header').text(cfg.title_sidebar + ' ');
580
+
581
+ // update toc element
582
+ $("#toc").empty().append(ul);
583
+
584
+ var depth = 1;
585
+ // update all headers with id that are in rendered text cell outputs,
586
+ // excepting any header which contains an html tag with class 'tocSkip'
587
+ // eg in ## title <a class='tocSkip'>,
588
+ // or the ToC cell.
589
+ all_headers = $('.text_cell_render').find('[id]:header:not(:has(.tocSkip))');
590
+ var min_lvl = 1 + Number(Boolean(cfg.skip_h1_title)),
591
+ lbl_ary = [];
592
+ for (; min_lvl <= 6; min_lvl++) {
593
+ if (all_headers.is('h' + min_lvl)) {
594
+ break;
595
+ }
596
+ }
597
+ lbl_ary[0] = cfg.base_numbering-1 // begin numbering at base_numbering
598
+ for (var i = min_lvl+1; i <= 6; i++) {
599
+ lbl_ary[i - min_lvl] = 0;
600
+ }
601
+
602
+ //loop over all headers
603
+ all_headers.each(function(i, h) {
604
+ // remove pre-existing number
605
+ $(h).children('.toc-item-num').remove();
606
+
607
+ var level = parseInt(h.tagName.slice(1), 10) - min_lvl + 1;
608
+ // skip below threshold, or h1 ruled out by cfg.skip_h1_title
609
+ if (level < 1 || level > cfg.threshold) {
610
+ return;
611
+ }
612
+ h = $(h);
613
+ // numbered heading labels
614
+ var num_str = incr_lbl(lbl_ary, level - 1).join('.');
615
+ if (cfg.number_sections) {
616
+ $('<span>')
617
+ .text(num_str + '\u00a0\u00a0')
618
+ .addClass('toc-item-num')
619
+ .prependTo(h);
620
+ }
621
+
622
+ // walk down levels
623
+ for (; depth < level; depth++) {
624
+ var li = ul.children('li:last-child');
625
+ if (li.length < 1) {
626
+ li = $('<li>').appendTo(ul);
627
+ }
628
+ ul = $('<ul class="toc-item">').appendTo(li);
629
+ }
630
+ // walk up levels
631
+ for (; depth > level; depth--) {
632
+ ul = ul.parent().closest('.toc-item');
633
+ }
634
+
635
+ var toc_mod_id = h.attr('id') + '-' + num_str;
636
+ h.attr('data-toc-modified-id', toc_mod_id);
637
+ // add an anchor with modified id (if it doesn't already exist)
638
+ h.children('.toc-mod-link').remove();
639
+ $('<a>').addClass('toc-mod-link').attr('id', toc_mod_id).prependTo(h);
640
+
641
+ // Create toc entry, append <li> tag to the current <ol>.
642
+ ul.append(
643
+ $('<li>').append(
644
+ $('<span>').append(
645
+ make_link(h, toc_mod_id))));
646
+ });
647
+
648
+ // update navigation menu
649
+ if (cfg.navigate_menu) {
650
+ var pop_nav = function() { //callback for create_nav_menu
651
+ $('#navigate_menu').empty().append($('#toc > .toc-item').clone());
652
+ }
653
+ if ($('#Navigate_menu').length == 0) {
654
+ create_navigate_menu((liveNotebook ? IPython.notebook.metadata.toc : cfg), pop_nav);
655
+ } else {
656
+ pop_nav()
657
+ }
658
+ } else { // If navigate_menu is false but the menu already exists, then remove it
659
+ if ($('#Navigate_menu').length > 0) $('#Navigate_sub').remove()
660
+ }
661
+
662
+ // if cfg.toc_cell=true, find/add and update a toc cell in the notebook.
663
+ process_cell_toc(cfg, st);
664
+
665
+ // add collapse controls
666
+ $('<i>')
667
+ .addClass('fa fa-fw fa-caret-down')
668
+ .on('click', callback_collapser) // callback
669
+ .prependTo('.toc li:has(ul) > span'); // only if li has descendants
670
+ $('<i>').addClass('fa fa-fw ').prependTo('.toc li:not(:has(ul)) > span'); // otherwise still add <i> to keep things aligned
671
+
672
+ events[cfg.collapse_to_match_collapsible_headings ? 'on' : 'off'](
673
+ 'collapse.CollapsibleHeading uncollapse.CollapsibleHeading', callback_toc2_collapsible_headings);
674
+ };
675
+
676
+ var toggle_toc = function(cfg, st) {
677
+ // toggle draw (first because of first-click behavior)
678
+ var wrap = $("#toc-wrapper");
679
+ var show = wrap.is(':hidden');
680
+ wrap.toggle(show);
681
+ cfg['toc_window_display'] = setMd('toc_window_display', show);
682
+ setNotebookWidth(cfg);
683
+ table_of_contents(cfg);
684
+ $("#toc_button").toggleClass('active', show);
685
+ };
686
+
687
+ var show_settings_dialog = function (cfg, st) {
688
+
689
+ var callback_setting_change = function (evt) {
690
+ var input = $(evt.currentTarget);
691
+ var md_key = input.attr('tocMdKey');
692
+ cfg[md_key] = setMd(md_key, input.attr('type') == 'checkbox' ? Boolean(input.prop('checked')) : input.val());
693
+ table_of_contents(cfg, st);
694
+ };
695
+ var build_setting_input = function (md_key, md_label, input_type) {
696
+ var opts = liveNotebook ? IPython.notebook.metadata.toc : cfg;
697
+ var id = 'toc-settings-' + md_key;
698
+ var fg = $('<div>').append(
699
+ $('<label>').text(md_label).attr('for', id));
700
+ var input = $('<input/>').attr({
701
+ type: input_type || 'text', id: id, tocMdKey: md_key,
702
+ }).on('change', callback_setting_change);
703
+ if (input_type == 'checkbox') {
704
+ fg.addClass('checkbox');
705
+ input
706
+ .prop('checked', opts[md_key])
707
+ .prependTo(fg.children('label'));
708
+ }
709
+ else {
710
+ fg.addClass('form-group');
711
+ input
712
+ .addClass('form-control')
713
+ .val(opts[md_key])
714
+ .appendTo(fg);
715
+ }
716
+ return fg;
717
+ };
718
+
719
+ var modal = $('<div class="modal fade" role="dialog"/>');
720
+ var dialog_content = $("<div/>")
721
+ .addClass("modal-content")
722
+ .appendTo($('<div class="modal-dialog">').appendTo(modal));
723
+ $('<div class="modal-header">')
724
+ .append('<button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>')
725
+ .append('<h4 class="modal-title">ToC2 settings</h4>')
726
+ .on('mousedown', function() { $('.modal').draggable({handle: '.modal-header'});})
727
+ .appendTo(dialog_content);
728
+ $('<div>')
729
+ .addClass('modal-body')
730
+ .append([
731
+ $('<div>').text(
732
+ 'These settings apply to this notebook only, and are stored in its metadata. ' +
733
+ liveNotebook ? 'The defaults for new notebooks can be edited from the nbextensions configurator.' :
734
+ 'The settings won\'t persist in non-live notebooks though.'),
735
+ build_setting_input('number_sections', 'Automatically number headings', 'checkbox'),
736
+ build_setting_input('skip_h1_title', 'Leave h1 items out of ToC', 'checkbox'),
737
+ build_setting_input('base_numbering', 'Begin numbering at'),
738
+ build_setting_input('toc_cell', 'Add notebook ToC cell', 'checkbox'),
739
+ build_setting_input('title_cell', 'ToC cell title'),
740
+ build_setting_input('title_sidebar', 'Sidebar/window title'),
741
+ build_setting_input('sideBar', 'Display as a sidebar (otherwise as a floating window)', 'checkbox'),
742
+ build_setting_input('toc_window_display', 'Display ToC window/sidebar at startup', 'checkbox'),
743
+ build_setting_input('toc_section_display', 'Expand window/sidebar at startup', 'checkbox'),
744
+ ])
745
+ .appendTo(dialog_content);
746
+ $('<div class="modal-footer">')
747
+ .append('<button class="btn btn-default btn-sm btn-primary" data-dismiss="modal">Ok</button>')
748
+ .appendTo(dialog_content);
749
+ // focus button on open
750
+ modal.on('shown.bs.modal', function () {
751
+ setTimeout(function () {
752
+ dialog_content.find('.modal-footer button').last().focus();
753
+ }, 0);
754
+ });
755
+
756
+ if (liveNotebook) {
757
+ Jupyter.notebook.keyboard_manager.disable();
758
+ modal.on('hidden.bs.modal', function () {
759
+ modal.remove(); // destroy modal on hide
760
+ Jupyter.notebook.keyboard_manager.enable();
761
+ Jupyter.notebook.keyboard_manager.command_mode();
762
+ var cell = Jupyter.notebook.get_selected_cell();
763
+ if (cell) cell.select();
764
+ });
765
+ }
766
+
767
+ // Try to use bootstrap modal, but bootstrap's js may not be available
768
+ // (e.g. as in non-live notebook), so we provide a poor-man's version
769
+ try {
770
+ return modal.modal({backdrop: 'static'});
771
+ }
772
+ catch (err) {
773
+ // show the backdrop
774
+ $(document.body).addClass('modal-open');
775
+ var $backdrop = $('<div class="modal-backdrop fade">').appendTo($(document.body));
776
+ $backdrop[0].offsetWidth; // force reflow
777
+ $backdrop.addClass('in');
778
+ // hook up removals
779
+ modal.on('click', '[data-dismiss="modal"]', function modal_close() {
780
+ // hide the modal foreground
781
+ modal.removeClass('in');
782
+ setTimeout(function on_foreground_hidden() {
783
+ modal.remove();
784
+ // now hide the backdrop
785
+ $backdrop.removeClass('in');
786
+ // wait for transition
787
+ setTimeout(function on_backdrop_hidden() {
788
+ $(document.body).removeClass('modal-open');
789
+ $backdrop.remove();
790
+ }, 150);
791
+ }, 300);
792
+ });
793
+ // wait for transition
794
+ setTimeout(function () {
795
+ // now show the modal foreground
796
+ modal.appendTo(document.body).show().scrollTop(0);
797
+ modal[0].offsetWidth; // force reflow
798
+ modal.addClass('in');
799
+ // wait for transition, then trigger callbacks
800
+ setTimeout(function on_foreground_shown() {
801
+ modal.trigger('shown.bs.modal');
802
+ }, 300);
803
+ }, 150);
804
+ return modal;
805
+ }
806
+ };
807
+
808
+ return {
809
+ highlight_toc_item: highlight_toc_item,
810
+ table_of_contents: table_of_contents,
811
+ toggle_toc: toggle_toc,
812
+ read_config: read_config,
813
+ };
814
+ });
815
+ // export table_of_contents to global namespace for backwards compatibility
816
+ // Do export synchronously, so that it's defined as soon as this file is loaded
817
+ if (!requirejs.specified('base/js/namespace')) {
818
+ window.table_of_contents = function(cfg, st) {
819
+ "use strict";
820
+ // use require to ensure the module is correctly loaded before the
821
+ // actual call is made
822
+ requirejs(['nbextensions/toc2/toc2'], function(toc2) {
823
+ toc2.table_of_contents(cfg, st);
824
+ });
825
+ };
826
+ }
.local/share/jupyter/nbextensions/toc2/toc2.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type: Jupyter Notebook Extension
2
+ Name: Table of Contents (2)
3
+ Description: The toc2 extension enables to collect all running headers and display them in a floating window, as a sidebar or with a navigation menu. The extension is also draggable, resizable, collapsable, dockable and features automatic numerotation with unique links ids, and an optional toc cell.
4
+ Link: README.md
5
+ Icon: icon.png
6
+ Main: main.js
7
+ Compatibility: 4.x, 5.x
8
+ Parameters:
9
+ - name: toc2.number_sections
10
+ description: Automatically number notebook's sections
11
+ input_type: checkbox
12
+ default: true
13
+ - name: toc2.threshold
14
+ description: Maximum level of nested sections to display on the tables of contents
15
+ input_type: number
16
+ min: -1
17
+ step: 1
18
+ default: 4
19
+
20
+ - name: toc2.skip_h1_title
21
+ description: |
22
+ Skip h1 headings from numbering, so that they can serve as a notebook title.
23
+ See the README for details, caveats and alternatives
24
+ input_type: checkbox
25
+ default: false
26
+
27
+ - name: toc2.toc_cell
28
+ description: Add a Table of Contents cell at the top of the notebook
29
+ input_type: checkbox
30
+ default: false
31
+
32
+ - name: toc2.title_cell
33
+ description: Default heading used for ToC cell (can also be set per-notebook)
34
+ default: 'Table of Contents'
35
+
36
+ - name: toc2.title_sidebar
37
+ description: Default title used for ToC sidebar/window (can also be set per-notebook)
38
+ default: 'Contents'
39
+
40
+ - name: toc2.toc_window_display
41
+ description: Display toc window/sidebar at startup
42
+ input_type: checkbox
43
+ default: false
44
+ - name: toc2.sideBar
45
+ description: Display Table of Contents as a sidebar (otherwise as a floating window)
46
+ input_type: checkbox
47
+ default: true
48
+ - name: toc2.markTocItemOnScroll
49
+ description: Mark toc item of header in viewport when scrolling
50
+ input_type: checkbox
51
+ default: true
52
+ - name: toc2.widenNotebook
53
+ description: Widen the display area to fit the browser window (may be useful with sidebar option)
54
+ input_type: checkbox
55
+ default: true
56
+ - name: toc2.navigate_menu
57
+ description: Display Table of Contents as a navigation menu
58
+ input_type: checkbox
59
+ default: true
60
+ - name: toc2.moveMenuLeft
61
+ description: Move notebook's title and menu on the left instead of being centered -- This provides a better look when the toc/sidebar is present
62
+ input_type: checkbox
63
+ default: true
64
+
65
+ - name: toc2.collapse_to_match_collapsible_headings
66
+ input_type: checkbox
67
+ default: false
68
+ description: |
69
+ Collapse/uncollapse ToC sections when the collapsible_headings nbextension
70
+ is used to collapse/uncollapse sections in the notebook. For the inverse
71
+ behaviour, see collapsible_headings' configuration
72
+
73
+ - name: toc2.colors.hover_highlight
74
+ input_type: color
75
+ description: Hover color in toc
76
+ default: "#DAA520"
77
+ - name: toc2.colors.on_scroll
78
+ input_type: color
79
+ description: Color of highlight mark on scrolling
80
+ default: '#2447f0'
81
+ - name: toc2.colors.selected_highlight
82
+ input_type: color
83
+ description: Color of sections with selected elements
84
+ default: "#FFD700"
85
+ - name: toc2.colors.running_highlight
86
+ input_type: color
87
+ description: Color of sections with running cells
88
+ default: "#FF0000"
89
+ - name: toc2.colors.wrapper_background
90
+ input_type: color
91
+ description: Color of wrapper window background
92
+ default: "#FFFFFF"
93
+ - name: toc2.colors.sidebar_border
94
+ input_type: color
95
+ description: Color of sidebar border
96
+ default: "#EEEEEE"
97
+ - name: toc2.colors.navigate_text
98
+ input_type: color
99
+ description: Color of navigate text
100
+ default: "#333333"
101
+ - name: toc2.colors.navigate_num
102
+ input_type: color
103
+ description: Color of navigate number
104
+ default: "#000000"
.local/share/jupyter/nbextensions/toggle_all_line_numbers/main.js ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // toggle on/off linenumber display in all codecells
2
+
3
+ define([
4
+ 'jquery',
5
+ 'base/js/namespace'
6
+ ], function(
7
+ $,
8
+ Jupyter
9
+ ) {
10
+ "use strict";
11
+
12
+ // define default values for config parameters
13
+ var params = {
14
+ toggle_all_linenumbers_hotkey : 'Alt-N',
15
+ toggle_all_linenumbers_enable_hotkey : true
16
+ };
17
+
18
+ // to be called once config is loaded, this updates default config vals
19
+ // with the ones specified by the server's config file
20
+ var update_params = function() {
21
+ var config = Jupyter.notebook.config;
22
+ for (var key in params) {
23
+ if (config.data.hasOwnProperty(key) ){
24
+ params[key] = config.data[key];
25
+ }
26
+ }
27
+ };
28
+
29
+ var toggle_all = function() {
30
+ var toolbar_button = $('#toggle_all_linenumbers');
31
+ toolbar_button.toggleClass('active', !toolbar_button.hasClass('active'));
32
+ var cells = Jupyter.notebook.get_cells();
33
+ for(var i in cells ){
34
+ cells[i].toggle_line_numbers();
35
+ }
36
+ };
37
+
38
+ // define action, register with ActionHandler instance
39
+ var prefix = 'auto';
40
+ var action_name = 'toggle-all-line-numbers';
41
+ var action = {
42
+ icon: 'fa-list-ol',
43
+ help: 'Toggle linenumbers in all codecells',
44
+ help_index : 'zz',
45
+ id: 'toggle_all_linenumbers',
46
+ handler: toggle_all
47
+ };
48
+ var action_full_name; // will be set on registration
49
+
50
+ var initialize = function () {
51
+ // update default config vals with the newly loaded ones
52
+ update_params();
53
+
54
+ // register actions with ActionHandler instance
55
+ action_full_name = Jupyter.keyboard_manager.actions.register(action, action_name, prefix);
56
+
57
+ // create toolbar button
58
+ Jupyter.toolbar.add_buttons_group([action_full_name]);
59
+
60
+ // (maybe) define hotkey
61
+ if (params.toggle_all_linenumbers_enable_hotkey &&
62
+ params.toggle_all_linenumbers_hotkey) {
63
+
64
+ console.log('toggle_all_linenumbers enabling hotkey:',
65
+ params.toggle_all_linenumbers_hotkey);
66
+
67
+ Jupyter.keyboard_manager.edit_shortcuts.add_shortcut(
68
+ params.toggle_all_linenumbers_hotkey, action_full_name);
69
+ Jupyter.keyboard_manager.command_shortcuts.add_shortcut(
70
+ params.toggle_all_linenumbers_hotkey, action_full_name);
71
+ }
72
+ };
73
+
74
+ var load_ipython_extension = function() {
75
+ return Jupyter.notebook.config.loaded.then(initialize);
76
+ };
77
+
78
+ var extension = {
79
+ load_ipython_extension : load_ipython_extension
80
+ };
81
+ return extension;
82
+ });
.local/share/jupyter/nbextensions/toggle_all_line_numbers/main.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type: IPython Notebook Extension
2
+ Name: Toggle all line numbers
3
+ Description: "Add a toolbar button and hotkey to toggle all cells' line numbers on or off"
4
+ Icon: icon.png
5
+ Link: readme.md
6
+ Main: main.js
7
+ Compatibility: 3.x, 4.x, 5.x
8
+ Parameters:
9
+ - name: toggle_all_linenumbers_hotkey
10
+ description: Hotkey combination to which to bind linenumber-toggling
11
+ input_type: hotkey
12
+ default: Alt-N
13
+ - name: toggle_all_linenumbers_enable_hotkey
14
+ description: Enable the hotkey
15
+ input_type: checkbox
16
+ default: true
.local/share/jupyter/nbextensions/toggle_all_line_numbers/readme.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Toggle all line numbers
2
+ =======================
3
+ This extension adds a toolbar button, along with an optional hotkey,
4
+ to toggle all cells' line numbers on or off in one action.
5
+
.local/share/jupyter/nbextensions/tree-filter/demo.gif ADDED
.local/share/jupyter/nbextensions/varInspector/__pycache__/var_list.cpython-310.pyc ADDED
Binary file (2.02 kB). View file
 
.local/share/jupyter/nbextensions/varInspector/demo.gif ADDED
.local/share/jupyter/nbextensions/varInspector/jquery.tablesorter.min.js ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ !function(a){"function"==typeof define&&define.amd?define(["jquery"],a):"object"==typeof module&&"object"==typeof module.exports?module.exports=a(require("jquery")):a(jQuery)}(function(a){return function(a){"use strict";var b=a.tablesorter={version:"2.25.7",parsers:[],widgets:[],defaults:{theme:"default",widthFixed:!1,showProcessing:!1,headerTemplate:"{content}",onRenderTemplate:null,onRenderHeader:null,cancelSelection:!0,tabIndex:!0,dateFormat:"mmddyyyy",sortMultiSortKey:"shiftKey",sortResetKey:"ctrlKey",usNumberFormat:!0,delayInit:!1,serverSideSorting:!1,resort:!0,headers:{},ignoreCase:!0,sortForce:null,sortList:[],sortAppend:null,sortStable:!1,sortInitialOrder:"asc",sortLocaleCompare:!1,sortReset:!1,sortRestart:!1,emptyTo:"bottom",stringTo:"max",duplicateSpan:!0,textExtraction:"basic",textAttribute:"data-text",textSorter:null,numberSorter:null,widgets:[],widgetOptions:{zebra:["even","odd"]},initWidgets:!0,widgetClass:"widget-{name}",initialized:null,tableClass:"",cssAsc:"",cssDesc:"",cssNone:"",cssHeader:"",cssHeaderRow:"",cssProcessing:"",cssChildRow:"tablesorter-childRow",cssInfoBlock:"tablesorter-infoOnly",cssNoSort:"tablesorter-noSort",cssIgnoreRow:"tablesorter-ignoreRow",cssIcon:"tablesorter-icon",cssIconNone:"",cssIconAsc:"",cssIconDesc:"",pointerClick:"click",pointerDown:"mousedown",pointerUp:"mouseup",selectorHeaders:"> thead th, > thead td",selectorSort:"th, td",selectorRemove:".remove-me",debug:!1,headerList:[],empties:{},strings:{},parsers:[]},css:{table:"tablesorter",cssHasChild:"tablesorter-hasChildRow",childRow:"tablesorter-childRow",colgroup:"tablesorter-colgroup",header:"tablesorter-header",headerRow:"tablesorter-headerRow",headerIn:"tablesorter-header-inner",icon:"tablesorter-icon",processing:"tablesorter-processing",sortAsc:"tablesorter-headerAsc",sortDesc:"tablesorter-headerDesc",sortNone:"tablesorter-headerUnSorted"},language:{sortAsc:"Ascending sort applied, ",sortDesc:"Descending sort applied, ",sortNone:"No sort applied, ",sortDisabled:"sorting is disabled",nextAsc:"activate to apply an ascending sort",nextDesc:"activate to apply a descending sort",nextNone:"activate to remove the sort"},regex:{templateContent:/\{content\}/g,templateIcon:/\{icon\}/g,templateName:/\{name\}/i,spaces:/\s+/g,nonWord:/\W/g,formElements:/(input|select|button|textarea)/i,chunk:/(^([+\-]?(?:\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)?$|^0x[0-9a-f]+$|\d+)/gi,chunks:/(^\\0|\\0$)/,hex:/^0x[0-9a-f]+$/i,comma:/,/g,digitNonUS:/[\s|\.]/g,digitNegativeTest:/^\s*\([.\d]+\)/,digitNegativeReplace:/^\s*\(([.\d]+)\)/,digitTest:/^[\-+(]?\d+[)]?$/,digitReplace:/[,.'"\s]/g},string:{max:1,min:-1,emptymin:1,emptymax:-1,zero:0,none:0,"null":0,top:!0,bottom:!1},keyCodes:{enter:13},dates:{},instanceMethods:{},setup:function(c,d){if(!c||!c.tHead||0===c.tBodies.length||c.hasInitialized===!0)return void(d.debug&&(c.hasInitialized?console.warn("Stopping initialization. Tablesorter has already been initialized"):console.error("Stopping initialization! No table, thead or tbody",c)));var e="",f=a(c),g=a.metadata;c.hasInitialized=!1,c.isProcessing=!0,c.config=d,a.data(c,"tablesorter",d),d.debug&&(console[console.group?"group":"log"]("Initializing tablesorter"),a.data(c,"startoveralltimer",new Date)),d.supportsDataObject=function(a){return a[0]=parseInt(a[0],10),a[0]>1||1===a[0]&&parseInt(a[1],10)>=4}(a.fn.jquery.split(".")),d.emptyTo=d.emptyTo.toLowerCase(),d.stringTo=d.stringTo.toLowerCase(),d.last={sortList:[],clickedIndex:-1},/tablesorter\-/.test(f.attr("class"))||(e=""!==d.theme?" tablesorter-"+d.theme:""),d.table=c,d.$table=f.addClass(b.css.table+" "+d.tableClass+e).attr("role","grid"),d.$headers=f.find(d.selectorHeaders),d.namespace?d.namespace="."+d.namespace.replace(b.regex.nonWord,""):d.namespace=".tablesorter"+Math.random().toString(16).slice(2),d.$table.children().children("tr").attr("role","row"),d.$tbodies=f.children("tbody:not(."+d.cssInfoBlock+")").attr({"aria-live":"polite","aria-relevant":"all"}),d.$table.children("caption").length&&(e=d.$table.children("caption")[0],e.id||(e.id=d.namespace.slice(1)+"caption"),d.$table.attr("aria-labelledby",e.id)),d.widgetInit={},d.textExtraction=d.$table.attr("data-text-extraction")||d.textExtraction||"basic",b.buildHeaders(d),b.fixColumnWidth(c),b.addWidgetFromClass(c),b.applyWidgetOptions(c),b.setupParsers(d),d.totalRows=0,d.delayInit||b.buildCache(d),b.bindEvents(c,d.$headers,!0),b.bindMethods(d),d.supportsDataObject&&"undefined"!=typeof f.data().sortlist?d.sortList=f.data().sortlist:g&&f.metadata()&&f.metadata().sortlist&&(d.sortList=f.metadata().sortlist),b.applyWidget(c,!0),d.sortList.length>0?b.sortOn(d,d.sortList,{},!d.initWidgets):(b.setHeadersCss(d),d.initWidgets&&b.applyWidget(c,!1)),d.showProcessing&&f.unbind("sortBegin"+d.namespace+" sortEnd"+d.namespace).bind("sortBegin"+d.namespace+" sortEnd"+d.namespace,function(a){clearTimeout(d.timerProcessing),b.isProcessing(c),"sortBegin"===a.type&&(d.timerProcessing=setTimeout(function(){b.isProcessing(c,!0)},500))}),c.hasInitialized=!0,c.isProcessing=!1,d.debug&&(console.log("Overall initialization time: "+b.benchmark(a.data(c,"startoveralltimer"))),d.debug&&console.groupEnd&&console.groupEnd()),f.triggerHandler("tablesorter-initialized",c),"function"==typeof d.initialized&&d.initialized(c)},bindMethods:function(c){var d=c.$table,e=c.namespace,f="sortReset update updateRows updateAll updateHeaders addRows updateCell updateComplete sorton appendCache updateCache applyWidgetId applyWidgets refreshWidgets destroy mouseup mouseleave ".split(" ").join(e+" ");d.unbind(f.replace(b.regex.spaces," ")).bind("sortReset"+e,function(a,c){a.stopPropagation(),b.sortReset(this.config,c)}).bind("updateAll"+e,function(a,c,d){a.stopPropagation(),b.updateAll(this.config,c,d)}).bind("update"+e+" updateRows"+e,function(a,c,d){a.stopPropagation(),b.update(this.config,c,d)}).bind("updateHeaders"+e,function(a,c){a.stopPropagation(),b.updateHeaders(this.config,c)}).bind("updateCell"+e,function(a,c,d,e){a.stopPropagation(),b.updateCell(this.config,c,d,e)}).bind("addRows"+e,function(a,c,d,e){a.stopPropagation(),b.addRows(this.config,c,d,e)}).bind("updateComplete"+e,function(){this.isUpdating=!1}).bind("sorton"+e,function(a,c,d,e){a.stopPropagation(),b.sortOn(this.config,c,d,e)}).bind("appendCache"+e,function(c,d,e){c.stopPropagation(),b.appendCache(this.config,e),a.isFunction(d)&&d(this)}).bind("updateCache"+e,function(a,c,d){a.stopPropagation(),b.updateCache(this.config,c,d)}).bind("applyWidgetId"+e,function(a,c){a.stopPropagation(),b.applyWidgetId(this,c)}).bind("applyWidgets"+e,function(a,c){a.stopPropagation(),b.applyWidget(this,c)}).bind("refreshWidgets"+e,function(a,c,d){a.stopPropagation(),b.refreshWidgets(this,c,d)}).bind("removeWidget"+e,function(a,c,d){a.stopPropagation(),b.removeWidget(this,c,d)}).bind("destroy"+e,function(a,c,d){a.stopPropagation(),b.destroy(this,c,d)}).bind("resetToLoadState"+e,function(d){d.stopPropagation(),b.removeWidget(this,!0,!1),c=a.extend(!0,b.defaults,c.originalSettings),this.hasInitialized=!1,b.setup(this,c)})},bindEvents:function(c,d,e){c=a(c)[0];var f,g=c.config,h=g.namespace,i=null;e!==!0&&(d.addClass(h.slice(1)+"_extra_headers"),f=a.fn.closest?d.closest("table")[0]:d.parents("table")[0],f&&"TABLE"===f.nodeName&&f!==c&&a(f).addClass(h.slice(1)+"_extra_table")),f=(g.pointerDown+" "+g.pointerUp+" "+g.pointerClick+" sort keyup ").replace(b.regex.spaces," ").split(" ").join(h+" "),d.find(g.selectorSort).add(d.filter(g.selectorSort)).unbind(f).bind(f,function(c,e){var f,h,j,k=a(c.target),l=" "+c.type+" ";if(!(1!==(c.which||c.button)&&!l.match(" "+g.pointerClick+" | sort | keyup ")||" keyup "===l&&c.which!==b.keyCodes.enter||l.match(" "+g.pointerClick+" ")&&"undefined"!=typeof c.which||l.match(" "+g.pointerUp+" ")&&i!==c.target&&e!==!0)){if(l.match(" "+g.pointerDown+" "))return i=c.target,j=k.jquery.split("."),void("1"===j[0]&&j[1]<4&&c.preventDefault());if(i=null,b.regex.formElements.test(c.target.nodeName)||k.hasClass(g.cssNoSort)||k.parents("."+g.cssNoSort).length>0||k.parents("button").length>0)return!g.cancelSelection;g.delayInit&&b.isEmptyObject(g.cache)&&b.buildCache(g),f=a.fn.closest?a(this).closest("th, td"):/TH|TD/.test(this.nodeName)?a(this):a(this).parents("th, td"),j=d.index(f),g.last.clickedIndex=0>j?f.attr("data-column"):j,h=g.$headers[g.last.clickedIndex],h&&!h.sortDisabled&&b.initSort(g,h,c)}}),g.cancelSelection&&d.attr("unselectable","on").bind("selectstart",!1).css({"user-select":"none",MozUserSelect:"none"})},buildHeaders:function(c){var d,e,f,g;for(c.headerList=[],c.headerContent=[],c.sortVars=[],c.debug&&(f=new Date),c.columns=b.computeColumnIndex(c.$table.children("thead, tfoot").children("tr")),e=c.cssIcon?'<i class="'+(c.cssIcon===b.css.icon?b.css.icon:c.cssIcon+" "+b.css.icon)+'"></i>':"",c.$headers=a(a.map(c.$table.find(c.selectorHeaders),function(d,f){var g,h,i,j,k,l=a(d);if(!l.parent().hasClass(c.cssIgnoreRow))return g=b.getColumnData(c.table,c.headers,f,!0),c.headerContent[f]=l.html(),""===c.headerTemplate||l.find("."+b.css.headerIn).length||(j=c.headerTemplate.replace(b.regex.templateContent,l.html()).replace(b.regex.templateIcon,l.find("."+b.css.icon).length?"":e),c.onRenderTemplate&&(h=c.onRenderTemplate.apply(l,[f,j]),h&&"string"==typeof h&&(j=h)),l.html('<div class="'+b.css.headerIn+'">'+j+"</div>")),c.onRenderHeader&&c.onRenderHeader.apply(l,[f,c,c.$table]),i=parseInt(l.attr("data-column"),10),d.column=i,k=b.getData(l,g,"sortInitialOrder")||c.sortInitialOrder,c.sortVars[i]={count:-1,order:b.getOrder(k)?[1,0,2]:[0,1,2],lockedOrder:!1},k=b.getData(l,g,"lockedOrder")||!1,"undefined"!=typeof k&&k!==!1&&(c.sortVars[i].lockedOrder=!0,c.sortVars[i].order=b.getOrder(k)?[1,1,1]:[0,0,0]),c.headerList[f]=d,l.addClass(b.css.header+" "+c.cssHeader).parent().addClass(b.css.headerRow+" "+c.cssHeaderRow).attr("role","row"),c.tabIndex&&l.attr("tabindex",0),d})),c.$headerIndexed=[],g=0;g<c.columns;g++)b.isEmptyObject(c.sortVars[g])&&(c.sortVars[g]={}),d=c.$headers.filter('[data-column="'+g+'"]'),c.$headerIndexed[g]=d.length?d.not(".sorter-false").length?d.not(".sorter-false").filter(":last"):d.filter(":last"):a();c.$table.find(c.selectorHeaders).attr({scope:"col",role:"columnheader"}),b.updateHeader(c),c.debug&&(console.log("Built headers:"+b.benchmark(f)),console.log(c.$headers))},addInstanceMethods:function(c){a.extend(b.instanceMethods,c)},setupParsers:function(a,c){var d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=a.table,s=0,t={};if(a.$tbodies=a.$table.children("tbody:not(."+a.cssInfoBlock+")"),p="undefined"==typeof c?a.$tbodies:c,q=p.length,0===q)return a.debug?console.warn("Warning: *Empty table!* Not building a parser cache"):"";for(a.debug&&(o=new Date,console[console.group?"group":"log"]("Detecting parsers for each column")),e={extractors:[],parsers:[]};q>s;){if(d=p[s].rows,d.length)for(h=0,g=a.columns,i=0;g>i;i++){if(j=a.$headerIndexed[h],j&&j.length&&(k=b.getColumnData(r,a.headers,h),n=b.getParserById(b.getData(j,k,"extractor")),m=b.getParserById(b.getData(j,k,"sorter")),l="false"===b.getData(j,k,"parser"),a.empties[h]=(b.getData(j,k,"empty")||a.emptyTo||(a.emptyToBottom?"bottom":"top")).toLowerCase(),a.strings[h]=(b.getData(j,k,"string")||a.stringTo||"max").toLowerCase(),l&&(m=b.getParserById("no-parser")),n||(n=!1),m||(m=b.detectParserForColumn(a,d,-1,h)),a.debug&&(t["("+h+") "+j.text()]={parser:m.id,extractor:n?n.id:"none",string:a.strings[h],empty:a.empties[h]}),e.parsers[h]=m,e.extractors[h]=n,f=j[0].colSpan-1,f>0))for(h+=f,g+=f;f+1>0;)e.parsers[h-f]=m,e.extractors[h-f]=n,f--;h++}s+=e.parsers.length?q:1}a.debug&&(b.isEmptyObject(t)?console.warn(" No parsers detected!"):console[console.table?"table":"log"](t),console.log("Completed detecting parsers"+b.benchmark(o)),console.groupEnd&&console.groupEnd()),a.parsers=e.parsers,a.extractors=e.extractors},addParser:function(a){var c,d=b.parsers.length,e=!0;for(c=0;d>c;c++)b.parsers[c].id.toLowerCase()===a.id.toLowerCase()&&(e=!1);e&&(b.parsers[b.parsers.length]=a)},getParserById:function(a){if("false"==a)return!1;var c,d=b.parsers.length;for(c=0;d>c;c++)if(b.parsers[c].id.toLowerCase()===a.toString().toLowerCase())return b.parsers[c];return!1},detectParserForColumn:function(c,d,e,f){for(var g,h,i,j=b.parsers.length,k=!1,l="",m=!0;""===l&&m;)e++,i=d[e],i&&50>e?i.className.indexOf(b.cssIgnoreRow)<0&&(k=d[e].cells[f],l=b.getElementText(c,k,f),h=a(k),c.debug&&console.log("Checking if value was empty on row "+e+", column: "+f+': "'+l+'"')):m=!1;for(;--j>=0;)if(g=b.parsers[j],g&&"text"!==g.id&&g.is&&g.is(l,c.table,k,h))return g;return b.getParserById("text")},getElementText:function(c,d,e){if(!d)return"";var f,g=c.textExtraction||"",h=d.jquery?d:a(d);return"string"==typeof g?"basic"===g&&"undefined"!=typeof(f=h.attr(c.textAttribute))?a.trim(f):a.trim(d.textContent||h.text()):"function"==typeof g?a.trim(g(h[0],c.table,e)):"function"==typeof(f=b.getColumnData(c.table,g,e))?a.trim(f(h[0],c.table,e)):a.trim(h[0].textContent||h.text())},getParsedText:function(a,c,d,e){"undefined"==typeof e&&(e=b.getElementText(a,c,d));var f=""+e,g=a.parsers[d],h=a.extractors[d];return g&&(h&&"function"==typeof h.format&&(e=h.format(e,a.table,c,d)),f="no-parser"===g.id?"":g.format(""+e,a.table,c,d),a.ignoreCase&&"string"==typeof f&&(f=f.toLowerCase())),f},buildCache:function(c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,A,B=c.table,C=c.parsers;if(c.$tbodies=c.$table.children("tbody:not(."+c.cssInfoBlock+")"),l="undefined"==typeof e?c.$tbodies:e,c.cache={},c.totalRows=0,!C)return c.debug?console.warn("Warning: *Empty table!* Not building a cache"):"";for(c.debug&&(q=new Date),c.showProcessing&&b.isProcessing(B,!0),k=0;k<l.length;k++){for(u=[],f=c.cache[k]={normalized:[]},r=l[k]&&l[k].rows.length||0,i=0;r>i;++i)if(s={child:[],raw:[]},m=a(l[k].rows[i]),n=[],m.hasClass(c.cssChildRow)&&0!==i)for(z=f.normalized.length-1,t=f.normalized[z][c.columns],t.$row=t.$row.add(m),m.prev().hasClass(c.cssChildRow)||m.prev().addClass(b.css.cssHasChild),o=m.children("th, td"),z=t.child.length,t.child[z]=[],w=0,y=c.columns,j=0;y>j;j++)p=o[j],p&&(t.child[z][j]=b.getParsedText(c,p,j),v=o[j].colSpan-1,v>0&&(w+=v,y+=v)),w++;else{for(s.$row=m,s.order=i,w=0,y=c.columns,j=0;y>j;++j){if(p=m[0].cells[j],p&&w<c.columns&&(x="undefined"!=typeof C[w],!x&&c.debug&&console.warn("No parser found for row: "+i+", column: "+j+'; cell containing: "'+a(p).text()+'"; does it have a header?'),g=b.getElementText(c,p,w),s.raw[w]=g,h=b.getParsedText(c,p,w,g),n[w]=h,x&&"numeric"===(C[w].type||"").toLowerCase()&&(u[w]=Math.max(Math.abs(h)||0,u[w]||0)),v=p.colSpan-1,v>0)){for(A=0;v>=A;)s.raw[w+A]=c.duplicateSpan||0===A?g:"",n[w+A]=c.duplicateSpan||0===A?g:"",A++;w+=v,y+=v}w++}n[c.columns]=s,f.normalized[f.normalized.length]=n}f.colMax=u,c.totalRows+=f.normalized.length}if(c.showProcessing&&b.isProcessing(B),c.debug){for(z=Math.min(5,c.cache[0].normalized.length),console[console.group?"group":"log"]("Building cache for "+c.totalRows+" rows (showing "+z+" rows in log)"+b.benchmark(q)),g={},j=0;j<c.columns;j++)for(w=0;z>w;w++)g["row: "+w]||(g["row: "+w]={}),g["row: "+w][c.$headerIndexed[j].text()]=c.cache[0].normalized[w][j];console[console.table?"table":"log"](g),console.groupEnd&&console.groupEnd()}a.isFunction(d)&&d(B)},getColumnText:function(c,d,e,f){c=a(c)[0];var g,h,i,j,k,l,m,n,o,p,q="function"==typeof e,r="all"===d,s={raw:[],parsed:[],$cell:[]},t=c.config;if(!b.isEmptyObject(t)){for(k=t.$tbodies.length,g=0;k>g;g++)for(i=t.cache[g].normalized,l=i.length,h=0;l>h;h++)j=i[h],f&&!j[t.columns].$row.is(f)||(p=!0,n=r?j.slice(0,t.columns):j[d],j=j[t.columns],m=r?j.raw:j.raw[d],o=r?j.$row.children():j.$row.children().eq(d),q&&(p=e({tbodyIndex:g,rowIndex:h,parsed:n,raw:m,$row:j.$row,$cell:o})),p!==!1&&(s.parsed[s.parsed.length]=n,s.raw[s.raw.length]=m,s.$cell[s.$cell.length]=o));return s}t.debug&&console.warn("No cache found - aborting getColumnText function!")},setHeadersCss:function(c){var d,e,f,g=c.sortList,h=g.length,i=b.css.sortNone+" "+c.cssNone,j=[b.css.sortAsc+" "+c.cssAsc,b.css.sortDesc+" "+c.cssDesc],k=[c.cssIconAsc,c.cssIconDesc,c.cssIconNone],l=["ascending","descending"],m=c.$table.find("tfoot tr").children("td, th").add(a(c.namespace+"_extra_headers")).removeClass(j.join(" "));for(c.$headers.removeClass(j.join(" ")).addClass(i).attr("aria-sort","none").find("."+b.css.icon).removeClass(k.join(" ")).addClass(k[2]),e=0;h>e;e++)if(2!==g[e][1]&&(d=c.$headers.filter(function(a){for(var d=!0,e=c.$headers.eq(a),f=parseInt(e.attr("data-column"),10),g=f+c.$headers[a].colSpan;g>f;f++)d=d?d||b.isValueInArray(f,c.sortList)>-1:!1;return d}),d=d.not(".sorter-false").filter('[data-column="'+g[e][0]+'"]'+(1===h?":last":"")),d.length)){for(f=0;f<d.length;f++)d[f].sortDisabled||d.eq(f).removeClass(i).addClass(j[g[e][1]]).attr("aria-sort",l[g[e][1]]).find("."+b.css.icon).removeClass(k[2]).addClass(k[g[e][1]]);m.length&&m.filter('[data-column="'+g[e][0]+'"]').removeClass(i).addClass(j[g[e][1]])}for(h=c.$headers.length,e=0;h>e;e++)b.setColumnAriaLabel(c,c.$headers.eq(e))},setColumnAriaLabel:function(c,d,e){if(d.length){var f=parseInt(d.attr("data-column"),10),g=d.hasClass(b.css.sortAsc)?"sortAsc":d.hasClass(b.css.sortDesc)?"sortDesc":"sortNone",h=a.trim(d.text())+": "+b.language[g];d.hasClass("sorter-false")||e===!1?h+=b.language.sortDisabled:(e=c.sortVars[f].order[(c.sortVars[f].count+1)%(c.sortReset?3:2)],h+=b.language[0===e?"nextAsc":1===e?"nextDesc":"nextNone"]),d.attr("aria-label",h)}},updateHeader:function(a){var c,d,e,f,g=a.table,h=a.$headers.length;for(c=0;h>c;c++)e=a.$headers.eq(c),f=b.getColumnData(g,a.headers,c,!0),d="false"===b.getData(e,f,"sorter")||"false"===b.getData(e,f,"parser"),b.setColumnSort(a,e,d)},setColumnSort:function(a,b,c){var d=a.table.id;b[0].sortDisabled=c,b[c?"addClass":"removeClass"]("sorter-false").attr("aria-disabled",""+c),a.tabIndex&&(c?b.removeAttr("tabindex"):b.attr("tabindex","0")),d&&(c?b.removeAttr("aria-controls"):b.attr("aria-controls",d))},updateHeaderSortCount:function(c,d){var e,f,g,h,i,j,k,l,m=d||c.sortList,n=m.length;for(c.sortList=[],h=0;n>h;h++)if(k=m[h],e=parseInt(k[0],10),e<c.columns){switch(c.sortVars[e].order||(l=c.sortVars[e].order=b.getOrder(c.sortInitialOrder)?[1,0,2]:[0,1,2],c.sortVars[e].count=0),l=c.sortVars[e].order,f=(""+k[1]).match(/^(1|d|s|o|n)/),f=f?f[0]:""){case"1":case"d":f=1;break;case"s":f=i||0;break;case"o":j=l[(i||0)%(c.sortReset?3:2)],f=0===j?1:1===j?0:2;break;case"n":f=l[++c.sortVars[e].count%(c.sortReset?3:2)];break;default:f=0}i=0===h?f:i,g=[e,parseInt(f,10)||0],c.sortList[c.sortList.length]=g,f=a.inArray(g[1],l),c.sortVars[e].count=f>=0?f:g[1]%(c.sortReset?3:2)}},updateAll:function(a,c,d){var e=a.table;e.isUpdating=!0,b.refreshWidgets(e,!0,!0),b.buildHeaders(a),b.bindEvents(e,a.$headers,!0),b.bindMethods(a),b.commonUpdate(a,c,d)},update:function(a,c,d){var e=a.table;e.isUpdating=!0,b.updateHeader(a),b.commonUpdate(a,c,d)},updateHeaders:function(a,c){a.table.isUpdating=!0,b.buildHeaders(a),b.bindEvents(a.table,a.$headers,!0),b.resortComplete(a,c)},updateCell:function(c,d,e,f){if(b.isEmptyObject(c.cache))return b.updateHeader(c),void b.commonUpdate(c,e,f);c.table.isUpdating=!0,c.$table.find(c.selectorRemove).remove();var g,h,i,j,k,l,m=c.$tbodies,n=a(d),o=m.index(a.fn.closest?n.closest("tbody"):n.parents("tbody").filter(":first")),p=c.cache[o],q=a.fn.closest?n.closest("tr"):n.parents("tr").filter(":first");if(d=n[0],m.length&&o>=0){if(i=m.eq(o).find("tr").index(q),k=p.normalized[i],l=q[0].cells.length,l!==c.columns)for(j=0,g=!1,h=0;l>h;h++)g||q[0].cells[h]===d?g=!0:j+=q[0].cells[h].colSpan;else j=n.index();g=b.getElementText(c,d,j),k[c.columns].raw[j]=g,g=b.getParsedText(c,d,j,g),k[j]=g,k[c.columns].$row=q,"numeric"===(c.parsers[j].type||"").toLowerCase()&&(p.colMax[j]=Math.max(Math.abs(g)||0,p.colMax[j]||0)),g="undefined"!==e?e:c.resort,g!==!1?b.checkResort(c,g,f):b.resortComplete(c,f)}else c.debug&&console.error("updateCell aborted, tbody missing or not within the indicated table"),c.table.isUpdating=!1},addRows:function(c,d,e,f){var g,h,i,j,k,l,m,n,o,p,q,r,s,t="string"==typeof d&&1===c.$tbodies.length&&/<tr/.test(d||""),u=c.table;if(t)d=a(d),c.$tbodies.append(d);else if(!(d&&d instanceof jQuery&&(a.fn.closest?d.closest("table")[0]:d.parents("table")[0])===c.table))return c.debug&&console.error("addRows method requires (1) a jQuery selector reference to rows that have already been added to the table, or (2) row HTML string to be added to a table with only one tbody"),!1;if(u.isUpdating=!0,b.isEmptyObject(c.cache))b.updateHeader(c),b.commonUpdate(c,e,f);else{for(k=d.filter("tr").attr("role","row").length,i=c.$tbodies.index(d.parents("tbody").filter(":first")),c.parsers&&c.parsers.length||b.setupParsers(c),j=0;k>j;j++){for(o=0,m=d[j].cells.length,n=c.cache[i].normalized.length,q=[],p={child:[],raw:[],$row:d.eq(j),order:n},l=0;m>l;l++)r=d[j].cells[l],g=b.getElementText(c,r,o),p.raw[o]=g,h=b.getParsedText(c,r,o,g),q[o]=h,"numeric"===(c.parsers[o].type||"").toLowerCase()&&(c.cache[i].colMax[o]=Math.max(Math.abs(h)||0,c.cache[i].colMax[o]||0)),s=r.colSpan-1,s>0&&(o+=s),o++;q[c.columns]=p,c.cache[i].normalized[n]=q}b.checkResort(c,e,f)}},updateCache:function(a,c,d){a.parsers&&a.parsers.length||b.setupParsers(a,d),b.buildCache(a,c,d)},appendCache:function(a,c){var d,e,f,g,h,i,j,k=a.table,l=a.widgetOptions,m=a.$tbodies,n=[],o=a.cache;if(b.isEmptyObject(o))return a.appender?a.appender(k,n):k.isUpdating?a.$table.triggerHandler("updateComplete",k):"";for(a.debug&&(j=new Date),i=0;i<m.length;i++)if(f=m.eq(i),f.length){for(g=b.processTbody(k,f,!0),d=o[i].normalized,e=d.length,h=0;e>h;h++)n[n.length]=d[h][a.columns].$row,a.appender&&(!a.pager||a.pager.removeRows&&l.pager_removeRows||a.pager.ajax)||g.append(d[h][a.columns].$row);b.processTbody(k,g,!1)}a.appender&&a.appender(k,n),a.debug&&console.log("Rebuilt table"+b.benchmark(j)),c||a.appender||b.applyWidget(k),k.isUpdating&&a.$table.triggerHandler("updateComplete",k)},commonUpdate:function(a,c,d){a.$table.find(a.selectorRemove).remove(),b.setupParsers(a),b.buildCache(a),b.checkResort(a,c,d)},initSort:function(c,d,e){if(c.table.isUpdating)return setTimeout(function(){b.initSort(c,d,e)},50);var f,g,h,i,j,k,l,m=!e[c.sortMultiSortKey],n=c.table,o=c.$headers.length,p=parseInt(a(d).attr("data-column"),10),q=c.sortVars[p].order;if(c.$table.triggerHandler("sortStart",n),c.sortVars[p].count=e[c.sortResetKey]?2:(c.sortVars[p].count+1)%(c.sortReset?3:2),c.sortRestart)for(h=0;o>h;h++)l=c.$headers.eq(h),k=parseInt(l.attr("data-column"),10),p!==k&&(m||l.hasClass(b.css.sortNone))&&(c.sortVars[k].count=-1);if(m){if(c.sortList=[],c.last.sortList=[],null!==c.sortForce)for(f=c.sortForce,g=0;g<f.length;g++)f[g][0]!==p&&(c.sortList[c.sortList.length]=f[g]);if(i=q[c.sortVars[p].count],2>i&&(c.sortList[c.sortList.length]=[p,i],d.colSpan>1))for(g=1;g<d.colSpan;g++)c.sortList[c.sortList.length]=[p+g,i],c.sortVars[p+g].count=a.inArray(i,q)}else if(c.sortList=a.extend([],c.last.sortList),b.isValueInArray(p,c.sortList)>=0)for(g=0;g<c.sortList.length;g++)k=c.sortList[g],k[0]===p&&(k[1]=q[c.sortVars[p].count],2===k[1]&&(c.sortList.splice(g,1),c.sortVars[p].count=-1));else if(i=q[c.sortVars[p].count],2>i&&(c.sortList[c.sortList.length]=[p,i],d.colSpan>1))for(g=1;g<d.colSpan;g++)c.sortList[c.sortList.length]=[p+g,i],c.sortVars[p+g].count=a.inArray(i,q);if(c.last.sortList=a.extend([],c.sortList),c.sortList.length&&c.sortAppend&&(f=a.isArray(c.sortAppend)?c.sortAppend:c.sortAppend[c.sortList[0][0]],!b.isEmptyObject(f)))for(g=0;g<f.length;g++)if(f[g][0]!==p&&b.isValueInArray(f[g][0],c.sortList)<0){if(i=f[g][1],j=(""+i).match(/^(a|d|s|o|n)/))switch(k=c.sortList[0][1],j[0]){case"d":i=1;break;case"s":i=k;break;case"o":i=0===k?1:0;break;case"n":i=(k+1)%(c.sortReset?3:2);break;default:i=0}c.sortList[c.sortList.length]=[f[g][0],i]}c.$table.triggerHandler("sortBegin",n),setTimeout(function(){b.setHeadersCss(c),b.multisort(c),b.appendCache(c),c.$table.triggerHandler("sortBeforeEnd",n),c.$table.triggerHandler("sortEnd",n)},1)},multisort:function(a){var c,d,e,f,g=a.table,h=0,i=a.textSorter||"",j=a.sortList,k=j.length,l=a.$tbodies.length;if(!a.serverSideSorting&&!b.isEmptyObject(a.cache)){for(a.debug&&(d=new Date),c=0;l>c;c++)e=a.cache[c].colMax,f=a.cache[c].normalized,f.sort(function(c,d){var f,l,m,n,o,p,q;for(f=0;k>f;f++){if(m=j[f][0],n=j[f][1],h=0===n,a.sortStable&&c[m]===d[m]&&1===k)return c[a.columns].order-d[a.columns].order;if(l=/n/i.test(b.getSortType(a.parsers,m)),l&&a.strings[m]?(l="boolean"==typeof b.string[a.strings[m]]?(h?1:-1)*(b.string[a.strings[m]]?-1:1):a.strings[m]?b.string[a.strings[m]]||0:0,o=a.numberSorter?a.numberSorter(c[m],d[m],h,e[m],g):b["sortNumeric"+(h?"Asc":"Desc")](c[m],d[m],l,e[m],m,a)):(p=h?c:d,q=h?d:c,o="function"==typeof i?i(p[m],q[m],h,m,g):"object"==typeof i&&i.hasOwnProperty(m)?i[m](p[m],q[m],h,m,g):b["sortNatural"+(h?"Asc":"Desc")](c[m],d[m],m,a)),o)return o}return c[a.columns].order-d[a.columns].order});a.debug&&console.log("Applying sort "+j.toString()+b.benchmark(d))}},resortComplete:function(b,c){b.table.isUpdating&&b.$table.triggerHandler("updateComplete",b.table),a.isFunction(c)&&c(b.table)},checkResort:function(c,d,e){var f=a.isArray(d)?d:c.sortList,g="undefined"==typeof d?c.resort:d;g===!1||c.serverSideSorting||c.table.isProcessing?(b.resortComplete(c,e),b.applyWidget(c.table,!1)):f.length?b.sortOn(c,f,function(){b.resortComplete(c,e)},!0):b.sortReset(c,function(){b.resortComplete(c,e),b.applyWidget(c.table,!1)})},sortOn:function(c,d,e,f){var g=c.table;c.$table.triggerHandler("sortStart",g),b.updateHeaderSortCount(c,d),b.setHeadersCss(c),c.delayInit&&b.isEmptyObject(c.cache)&&b.buildCache(c),c.$table.triggerHandler("sortBegin",g),b.multisort(c),b.appendCache(c,f),c.$table.triggerHandler("sortBeforeEnd",g),c.$table.triggerHandler("sortEnd",g),b.applyWidget(g),a.isFunction(e)&&e(g)},sortReset:function(c,d){c.sortList=[],b.setHeadersCss(c),b.multisort(c),b.appendCache(c),a.isFunction(d)&&d(c.table)},getSortType:function(a,b){return a&&a[b]?a[b].type||"":""},getOrder:function(a){return/^d/i.test(a)||1===a},sortNatural:function(a,c){if(a===c)return 0;var d,e,f,g,h,i,j=b.regex;if(j.hex.test(c)){if(d=parseInt(a.match(j.hex),16),e=parseInt(c.match(j.hex),16),e>d)return-1;if(d>e)return 1}for(d=a.replace(j.chunk,"\\0$1\\0").replace(j.chunks,"").split("\\0"),e=c.replace(j.chunk,"\\0$1\\0").replace(j.chunks,"").split("\\0"),i=Math.max(d.length,e.length),h=0;i>h;h++){if(f=isNaN(d[h])?d[h]||0:parseFloat(d[h])||0,g=isNaN(e[h])?e[h]||0:parseFloat(e[h])||0,isNaN(f)!==isNaN(g))return isNaN(f)?1:-1;if(typeof f!=typeof g&&(f+="",g+=""),g>f)return-1;if(f>g)return 1}return 0},sortNaturalAsc:function(a,c,d,e){if(a===c)return 0;var f=b.string[e.empties[d]||e.emptyTo];return""===a&&0!==f?"boolean"==typeof f?f?-1:1:-f||-1:""===c&&0!==f?"boolean"==typeof f?f?1:-1:f||1:b.sortNatural(a,c)},sortNaturalDesc:function(a,c,d,e){if(a===c)return 0;var f=b.string[e.empties[d]||e.emptyTo];return""===a&&0!==f?"boolean"==typeof f?f?-1:1:f||1:""===c&&0!==f?"boolean"==typeof f?f?1:-1:-f||-1:b.sortNatural(c,a)},sortText:function(a,b){return a>b?1:b>a?-1:0},getTextValue:function(a,b,c){if(c){var d,e=a?a.length:0,f=c+b;for(d=0;e>d;d++)f+=a.charCodeAt(d);return b*f}return 0},sortNumericAsc:function(a,c,d,e,f,g){if(a===c)return 0;var h=b.string[g.empties[f]||g.emptyTo];return""===a&&0!==h?"boolean"==typeof h?h?-1:1:-h||-1:""===c&&0!==h?"boolean"==typeof h?h?1:-1:h||1:(isNaN(a)&&(a=b.getTextValue(a,d,e)),isNaN(c)&&(c=b.getTextValue(c,d,e)),a-c)},sortNumericDesc:function(a,c,d,e,f,g){if(a===c)return 0;var h=b.string[g.empties[f]||g.emptyTo];return""===a&&0!==h?"boolean"==typeof h?h?-1:1:h||1:""===c&&0!==h?"boolean"==typeof h?h?1:-1:-h||-1:(isNaN(a)&&(a=b.getTextValue(a,d,e)),isNaN(c)&&(c=b.getTextValue(c,d,e)),c-a)},sortNumeric:function(a,b){return a-b},addWidget:function(a){a.id&&!b.isEmptyObject(b.getWidgetById(a.id))&&console.warn('"'+a.id+'" widget was loaded more than once!'),b.widgets[b.widgets.length]=a},hasWidget:function(b,c){return b=a(b),b.length&&b[0].config&&b[0].config.widgetInit[c]||!1},getWidgetById:function(a){var c,d,e=b.widgets.length;for(c=0;e>c;c++)if(d=b.widgets[c],d&&d.id&&d.id.toLowerCase()===a.toLowerCase())return d},applyWidgetOptions:function(c){var d,e,f=c.config,g=f.widgets.length;if(g)for(d=0;g>d;d++)e=b.getWidgetById(f.widgets[d]),e&&e.options&&(f.widgetOptions=a.extend(!0,{},e.options,f.widgetOptions))},addWidgetFromClass:function(a){var c,d,e=a.config,f="^"+e.widgetClass.replace(b.regex.templateName,"(\\S+)+")+"$",g=new RegExp(f,"g"),h=(a.className||"").split(b.regex.spaces);if(h.length)for(c=h.length,d=0;c>d;d++)h[d].match(g)&&(e.widgets[e.widgets.length]=h[d].replace(g,"$1"))},applyWidgetId:function(c,d,e){c=a(c)[0];var f,g,h,i=c.config,j=i.widgetOptions,k=b.getWidgetById(d);k&&(h=k.id,f=!1,a.inArray(h,i.widgets)<0&&(i.widgets[i.widgets.length]=h),i.debug&&(g=new Date),!e&&i.widgetInit[h]||(i.widgetInit[h]=!0,c.hasInitialized&&b.applyWidgetOptions(c),"function"==typeof k.init&&(f=!0,i.debug&&console[console.group?"group":"log"]("Initializing "+h+" widget"),k.init(c,k,i,j))),e||"function"!=typeof k.format||(f=!0,i.debug&&console[console.group?"group":"log"]("Updating "+h+" widget"),k.format(c,i,j,!1)),i.debug&&f&&(console.log("Completed "+(e?"initializing ":"applying ")+h+" widget"+b.benchmark(g)),console.groupEnd&&console.groupEnd()))},applyWidget:function(c,d,e){c=a(c)[0];var f,g,h,i,j,k=c.config,l=[];if(d===!1||!c.hasInitialized||!c.isApplyingWidgets&&!c.isUpdating){if(k.debug&&(j=new Date),b.addWidgetFromClass(c),clearTimeout(k.timerReady),k.widgets.length){for(c.isApplyingWidgets=!0,k.widgets=a.grep(k.widgets,function(b,c){return a.inArray(b,k.widgets)===c}),h=k.widgets||[],g=h.length,f=0;g>f;f++)i=b.getWidgetById(h[f]),i&&i.id?(i.priority||(i.priority=10),l[f]=i):k.debug&&console.warn('"'+h[f]+'" widget code does not exist!');for(l.sort(function(a,b){return a.priority<b.priority?-1:a.priority===b.priority?0:1}),g=l.length,k.debug&&console[console.group?"group":"log"]("Start "+(d?"initializing":"applying")+" widgets"),f=0;g>f;f++)i=l[f],i&&i.id&&b.applyWidgetId(c,i.id,d);k.debug&&console.groupEnd&&console.groupEnd(),d||"function"!=typeof e||e(c)}k.timerReady=setTimeout(function(){c.isApplyingWidgets=!1,a.data(c,"lastWidgetApplication",new Date),k.$table.triggerHandler("tablesorter-ready")},10),k.debug&&(i=k.widgets.length,console.log("Completed "+(d===!0?"initializing ":"applying ")+i+" widget"+(1!==i?"s":"")+b.benchmark(j)))}},removeWidget:function(c,d,e){c=a(c)[0];var f,g,h,i,j=c.config;if(d===!0)for(d=[],i=b.widgets.length,h=0;i>h;h++)g=b.widgets[h],g&&g.id&&(d[d.length]=g.id);else d=(a.isArray(d)?d.join(","):d||"").toLowerCase().split(/[\s,]+/);for(i=d.length,f=0;i>f;f++)g=b.getWidgetById(d[f]),h=a.inArray(d[f],j.widgets),h>=0&&e!==!0&&j.widgets.splice(h,1),g&&g.remove&&(j.debug&&console.log((e?"Refreshing":"Removing")+' "'+d[f]+'" widget'),g.remove(c,j,j.widgetOptions,e),j.widgetInit[d[f]]=!1)},refreshWidgets:function(c,d,e){c=a(c)[0];var f,g,h=c.config,i=h.widgets,j=b.widgets,k=j.length,l=[],m=function(b){a(b).triggerHandler("refreshComplete")};for(f=0;k>f;f++)g=j[f],g&&g.id&&(d||a.inArray(g.id,i)<0)&&(l[l.length]=g.id);b.removeWidget(c,l.join(","),!0),e!==!0?(b.applyWidget(c,d||!1,m),d&&b.applyWidget(c,!1,m)):m(c)},benchmark:function(a){return" ( "+((new Date).getTime()-a.getTime())+"ms )"},log:function(){console.log(arguments)},isEmptyObject:function(a){for(var b in a)return!1;return!0},isValueInArray:function(a,b){var c,d=b&&b.length||0;for(c=0;d>c;c++)if(b[c][0]===a)return c;return-1},formatFloat:function(c,d){if("string"!=typeof c||""===c)return c;var e,f=d&&d.config?d.config.usNumberFormat!==!1:"undefined"!=typeof d?d:!0;return c=f?c.replace(b.regex.comma,""):c.replace(b.regex.digitNonUS,"").replace(b.regex.comma,"."),b.regex.digitNegativeTest.test(c)&&(c=c.replace(b.regex.digitNegativeReplace,"-$1")),e=parseFloat(c),isNaN(e)?a.trim(c):e},isDigit:function(a){return isNaN(a)?b.regex.digitTest.test(a.toString().replace(b.regex.digitReplace,"")):""!==a},computeColumnIndex:function(b,c){var d,e,f,g,h,i,j,k,l,m,n=c&&c.columns||0,o=[],p=new Array(n);
2
+ for(d=0;d<b.length;d++)for(i=b[d].cells,e=0;e<i.length;e++){for(h=i[e],j=h.parentNode.rowIndex,k=h.rowSpan||1,l=h.colSpan||1,"undefined"==typeof o[j]&&(o[j]=[]),f=0;f<o[j].length+1;f++)if("undefined"==typeof o[j][f]){m=f;break}for(n&&h.cellIndex===m||(h.setAttribute?h.setAttribute("data-column",m):a(h).attr("data-column",m)),f=j;j+k>f;f++)for("undefined"==typeof o[f]&&(o[f]=[]),p=o[f],g=m;m+l>g;g++)p[g]="x"}return p.length},fixColumnWidth:function(c){c=a(c)[0];var d,e,f,g,h,i=c.config,j=i.$table.children("colgroup");if(j.length&&j.hasClass(b.css.colgroup)&&j.remove(),i.widthFixed&&0===i.$table.children("colgroup").length){for(j=a('<colgroup class="'+b.css.colgroup+'">'),d=i.$table.width(),f=i.$tbodies.find("tr:first").children(":visible"),g=f.length,h=0;g>h;h++)e=parseInt(f.eq(h).width()/d*1e3,10)/10+"%",j.append(a("<col>").css("width",e));i.$table.prepend(j)}},getData:function(b,c,d){var e,f,g="",h=a(b);return h.length?(e=a.metadata?h.metadata():!1,f=" "+(h.attr("class")||""),"undefined"!=typeof h.data(d)||"undefined"!=typeof h.data(d.toLowerCase())?g+=h.data(d)||h.data(d.toLowerCase()):e&&"undefined"!=typeof e[d]?g+=e[d]:c&&"undefined"!=typeof c[d]?g+=c[d]:" "!==f&&f.match(" "+d+"-")&&(g=f.match(new RegExp("\\s"+d+"-([\\w-]+)"))[1]||""),a.trim(g)):""},getColumnData:function(b,c,d,e,f){if("undefined"!=typeof c&&null!==c){b=a(b)[0];var g,h,i=b.config,j=f||i.$headers,k=i.$headerIndexed&&i.$headerIndexed[d]||j.filter('[data-column="'+d+'"]:last');if(c[d])return e?c[d]:c[j.index(k)];for(h in c)if("string"==typeof h&&(g=k.filter(h).add(k.find(h)),g.length))return c[h]}},isProcessing:function(c,d,e){c=a(c);var f=c[0].config,g=e||c.find("."+b.css.header);d?("undefined"!=typeof e&&f.sortList.length>0&&(g=g.filter(function(){return this.sortDisabled?!1:b.isValueInArray(parseFloat(a(this).attr("data-column")),f.sortList)>=0})),c.add(g).addClass(b.css.processing+" "+f.cssProcessing)):c.add(g).removeClass(b.css.processing+" "+f.cssProcessing)},processTbody:function(b,c,d){if(b=a(b)[0],d)return b.isProcessing=!0,c.before('<colgroup class="tablesorter-savemyplace"/>'),a.fn.detach?c.detach():c.remove();var e=a(b).find("colgroup.tablesorter-savemyplace");c.insertAfter(e),e.remove(),b.isProcessing=!1},clearTableBody:function(b){a(b)[0].config.$tbodies.children().detach()},characterEquivalents:{a:"áàâãäąå",A:"ÁÀÂÃÄĄÅ",c:"çćč",C:"ÇĆČ",e:"éèêëěę",E:"ÉÈÊËĚĘ",i:"íìİîïı",I:"ÍÌİÎÏ",o:"óòôõöō",O:"ÓÒÔÕÖŌ",ss:"ß",SS:"ẞ",u:"úùûüů",U:"ÚÙÛÜŮ"},replaceAccents:function(a){var c,d="[",e=b.characterEquivalents;if(!b.characterRegex){b.characterRegexArray={};for(c in e)"string"==typeof c&&(d+=e[c],b.characterRegexArray[c]=new RegExp("["+e[c]+"]","g"));b.characterRegex=new RegExp(d+"]")}if(b.characterRegex.test(a))for(c in e)"string"==typeof c&&(a=a.replace(b.characterRegexArray[c],c));return a},restoreHeaders:function(c){var d,e,f=a(c)[0].config,g=f.$table.find(f.selectorHeaders),h=g.length;for(d=0;h>d;d++)e=g.eq(d),e.find("."+b.css.headerIn).length&&e.html(f.headerContent[d])},destroy:function(c,d,e){if(c=a(c)[0],c.hasInitialized){b.removeWidget(c,!0,!1);var f,g=a(c),h=c.config,i=h.debug,j=g.find("thead:first"),k=j.find("tr."+b.css.headerRow).removeClass(b.css.headerRow+" "+h.cssHeaderRow),l=g.find("tfoot:first > tr").children("th, td");d===!1&&a.inArray("uitheme",h.widgets)>=0&&(g.triggerHandler("applyWidgetId",["uitheme"]),g.triggerHandler("applyWidgetId",["zebra"])),j.find("tr").not(k).remove(),f="sortReset update updateRows updateAll updateHeaders updateCell addRows updateComplete sorton appendCache updateCache applyWidgetId applyWidgets refreshWidgets removeWidget destroy mouseup mouseleave "+"keypress sortBegin sortEnd resetToLoadState ".split(" ").join(h.namespace+" "),g.removeData("tablesorter").unbind(f.replace(b.regex.spaces," ")),h.$headers.add(l).removeClass([b.css.header,h.cssHeader,h.cssAsc,h.cssDesc,b.css.sortAsc,b.css.sortDesc,b.css.sortNone].join(" ")).removeAttr("data-column").removeAttr("aria-label").attr("aria-disabled","true"),k.find(h.selectorSort).unbind("mousedown mouseup keypress ".split(" ").join(h.namespace+" ").replace(b.regex.spaces," ")),b.restoreHeaders(c),g.toggleClass(b.css.table+" "+h.tableClass+" tablesorter-"+h.theme,d===!1),c.hasInitialized=!1,delete c.config.cache,"function"==typeof e&&e(c),i&&console.log("tablesorter has been removed")}}};a.fn.tablesorter=function(c){return this.each(function(){var d=this,e=a.extend(!0,{},b.defaults,c,b.instanceMethods);e.originalSettings=c,!d.hasInitialized&&b.buildTable&&"TABLE"!==this.nodeName?b.buildTable(d,e):b.setup(d,e)})},window.console&&window.console.log||(b.logs=[],console={},console.log=console.warn=console.error=console.table=function(){var a=arguments.length>1?arguments:arguments[0];b.logs[b.logs.length]={date:Date.now(),log:a}}),b.addParser({id:"no-parser",is:function(){return!1},format:function(){return""},type:"text"}),b.addParser({id:"text",is:function(){return!0},format:function(c,d){var e=d.config;return c&&(c=a.trim(e.ignoreCase?c.toLocaleLowerCase():c),c=e.sortLocaleCompare?b.replaceAccents(c):c),c},type:"text"}),b.regex.nondigit=/[^\w,. \-()]/g,b.addParser({id:"digit",is:function(a){return b.isDigit(a)},format:function(c,d){var e=b.formatFloat((c||"").replace(b.regex.nondigit,""),d);return c&&"number"==typeof e?e:c?a.trim(c&&d.config.ignoreCase?c.toLocaleLowerCase():c):c},type:"numeric"}),b.regex.currencyReplace=/[+\-,. ]/g,b.regex.currencyTest=/^\(?\d+[\u00a3$\u20ac\u00a4\u00a5\u00a2?.]|[\u00a3$\u20ac\u00a4\u00a5\u00a2?.]\d+\)?$/,b.addParser({id:"currency",is:function(a){return a=(a||"").replace(b.regex.currencyReplace,""),b.regex.currencyTest.test(a)},format:function(c,d){var e=b.formatFloat((c||"").replace(b.regex.nondigit,""),d);return c&&"number"==typeof e?e:c?a.trim(c&&d.config.ignoreCase?c.toLocaleLowerCase():c):c},type:"numeric"}),b.regex.urlProtocolTest=/^(https?|ftp|file):\/\//,b.regex.urlProtocolReplace=/(https?|ftp|file):\/\//,b.addParser({id:"url",is:function(a){return b.regex.urlProtocolTest.test(a)},format:function(c){return c?a.trim(c.replace(b.regex.urlProtocolReplace,"")):c},parsed:!0,type:"text"}),b.regex.dash=/-/g,b.regex.isoDate=/^\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2}/,b.addParser({id:"isoDate",is:function(a){return b.regex.isoDate.test(a)},format:function(a,c){var d=a?new Date(a.replace(b.regex.dash,"/")):a;return d instanceof Date&&isFinite(d)?d.getTime():a},type:"numeric"}),b.regex.percent=/%/g,b.regex.percentTest=/(\d\s*?%|%\s*?\d)/,b.addParser({id:"percent",is:function(a){return b.regex.percentTest.test(a)&&a.length<15},format:function(a,c){return a?b.formatFloat(a.replace(b.regex.percent,""),c):a},type:"numeric"}),b.addParser({id:"image",is:function(a,b,c,d){return d.find("img").length>0},format:function(b,c,d){return a(d).find("img").attr(c.config.imgAttr||"alt")||b},parsed:!0,type:"text"}),b.regex.dateReplace=/(\S)([AP]M)$/i,b.regex.usLongDateTest1=/^[A-Z]{3,10}\.?\s+\d{1,2},?\s+(\d{4})(\s+\d{1,2}:\d{2}(:\d{2})?(\s+[AP]M)?)?$/i,b.regex.usLongDateTest2=/^\d{1,2}\s+[A-Z]{3,10}\s+\d{4}/i,b.addParser({id:"usLongDate",is:function(a){return b.regex.usLongDateTest1.test(a)||b.regex.usLongDateTest2.test(a)},format:function(a,c){var d=a?new Date(a.replace(b.regex.dateReplace,"$1 $2")):a;return d instanceof Date&&isFinite(d)?d.getTime():a},type:"numeric"}),b.regex.shortDateTest=/(^\d{1,2}[\/\s]\d{1,2}[\/\s]\d{4})|(^\d{4}[\/\s]\d{1,2}[\/\s]\d{1,2})/,b.regex.shortDateReplace=/[\-.,]/g,b.regex.shortDateXXY=/(\d{1,2})[\/\s](\d{1,2})[\/\s](\d{4})/,b.regex.shortDateYMD=/(\d{4})[\/\s](\d{1,2})[\/\s](\d{1,2})/,b.convertFormat=function(a,c){a=(a||"").replace(b.regex.spaces," ").replace(b.regex.shortDateReplace,"/"),"mmddyyyy"===c?a=a.replace(b.regex.shortDateXXY,"$3/$1/$2"):"ddmmyyyy"===c?a=a.replace(b.regex.shortDateXXY,"$3/$2/$1"):"yyyymmdd"===c&&(a=a.replace(b.regex.shortDateYMD,"$1/$2/$3"));var d=new Date(a);return d instanceof Date&&isFinite(d)?d.getTime():""},b.addParser({id:"shortDate",is:function(a){return a=(a||"").replace(b.regex.spaces," ").replace(b.regex.shortDateReplace,"/"),b.regex.shortDateTest.test(a)},format:function(a,c,d,e){if(a){var f=c.config,g=f.$headerIndexed[e],h=g.length&&g.data("dateFormat")||b.getData(g,b.getColumnData(c,f.headers,e),"dateFormat")||f.dateFormat;return g.length&&g.data("dateFormat",h),b.convertFormat(a,h)||a}return a},type:"numeric"}),b.regex.timeTest=/^([1-9]|1[0-2]):([0-5]\d)(\s[AP]M)$|^((?:[01]\d|[2][0-4]):[0-5]\d)$/i,b.regex.timeMatch=/([1-9]|1[0-2]):([0-5]\d)(\s[AP]M)|((?:[01]\d|[2][0-4]):[0-5]\d)/i,b.addParser({id:"time",is:function(a){return b.regex.timeTest.test(a)},format:function(a,c){var d,e=(a||"").match(b.regex.timeMatch),f=new Date(a),g=a&&(null!==e?e[0]:"00:00 AM"),h=g?new Date("2000/01/01 "+g.replace(b.regex.dateReplace,"$1 $2")):g;return h instanceof Date&&isFinite(h)?(d=f instanceof Date&&isFinite(f)?f.getTime():0,d?parseFloat(h.getTime()+"."+f.getTime()):h.getTime()):a},type:"numeric"}),b.addParser({id:"metadata",is:function(){return!1},format:function(b,c,d){var e=c.config,f=e.parserMetadataName?e.parserMetadataName:"sortValue";return a(d).metadata()[f]},type:"numeric"}),b.addWidget({id:"zebra",priority:90,format:function(b,c,d){var e,f,g,h,i,j,k,l=new RegExp(c.cssChildRow,"i"),m=c.$tbodies.add(a(c.namespace+"_extra_table").children("tbody:not(."+c.cssInfoBlock+")"));for(i=0;i<m.length;i++)for(g=0,e=m.eq(i).children("tr:visible").not(c.selectorRemove),k=e.length,j=0;k>j;j++)f=e.eq(j),l.test(f[0].className)||g++,h=g%2===0,f.removeClass(d.zebra[h?1:0]).addClass(d.zebra[h?0:1])},remove:function(a,c,d,e){if(!e){var f,g,h=c.$tbodies,i=(d.zebra||["even","odd"]).join(" ");for(f=0;f<h.length;f++)g=b.processTbody(a,h.eq(f),!0),g.children().removeClass(i),b.processTbody(a,g,!1)}}})}(jQuery),a.tablesorter});
.local/share/jupyter/nbextensions/varInspector/main.css ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .varInspector {
3
+ max-height: 500px;
4
+ min-height: 100px;
5
+ font-size: 80%;
6
+ padding: 0px;
7
+ overflow-y: auto;
8
+ font-weight: normal;
9
+ color: #333333;
10
+ white-space: nowrap;
11
+ overflow-x: auto;
12
+ }
13
+
14
+ .varInspector-float-wrapper {
15
+ position: fixed !important;
16
+ top: 120px;
17
+ width:350px;
18
+ max-width:800px;
19
+ right: 20px;
20
+ border: thin solid rgba(0, 0, 0, 0.38);
21
+ border-radius: 5px;
22
+ padding:10px;
23
+ background-color: #fff;
24
+ opacity: .95;
25
+ z-index: 100;
26
+ overflow: hidden;
27
+ }
28
+
29
+ .hide-btn{
30
+ float: right;
31
+ }
32
+
33
+ .reload-btn{
34
+ float: right;
35
+ }
36
+
37
+ .kill-btn{
38
+ float: right;
39
+ }
40
+
41
+ .col-md-9 {
42
+ overflow:hidden;
43
+ margin-left: 14%;
44
+ width: 80%}
45
+
46
+ #varInspector-wrapper.closed {
47
+ min-width: 250px;
48
+ width: auto;
49
+ transition: width;
50
+ }
51
+ #varInspector-wrapper:hover{
52
+ opacity: 1;
53
+ }
54
+ #varInspector-wrapper .header {
55
+ font-size: 16px;
56
+ font-weight: bold;
57
+ }
58
+ #varInspector-wrapper .hide-btn {
59
+ font-size: 14px;
60
+ font-family: monospace;
61
+ }
62
+
63
+ #varInspector-wrapper .reload-btn {
64
+ font-size: 14px;
65
+ font-family: monospace;
66
+ }
67
+
68
+ #varInspector-wrapper .kill-btn {
69
+ font-size: 14px;
70
+ font-family: monospace;
71
+ }
72
+
73
+
74
+
75
+ /* don't waste so much screen space... */
76
+ #varInspector-wrapper .toc-item{
77
+ padding-left: 20px;
78
+ }
79
+
80
+ #varInspector-wrapper .toc-item .toc-item{
81
+ padding-left: 10px;
82
+ }
83
+
84
+
85
+
86
+ table.table, table.table tr, table.table td, table.table th {
87
+ border: 0;
88
+ }
89
+ table.table-nonfluid {
90
+ width: auto !important;
91
+ }
92
+ table.table {
93
+ margin-left: 0;
94
+ margin-right: 0;
95
+ }
96
+ /* stuff for tablesorter plugin */
97
+ .tablesorter-default .header,
98
+ .tablesorter-default .tablesorter-header {
99
+ background-image: url();
100
+ background-position: right center;
101
+ background-repeat: no-repeat;
102
+ cursor: pointer;
103
+ padding-right: 20px;
104
+ }
105
+ .tablesorter-default thead .headerSortUp,
106
+ .tablesorter-default thead .tablesorter-headerSortUp,
107
+ .tablesorter-default thead .tablesorter-headerAsc {
108
+ background-image: url();
109
+ }
110
+ .tablesorter-default thead .headerSortDown,
111
+ .tablesorter-default thead .tablesorter-headerSortDown,
112
+ .tablesorter-default thead .tablesorter-headerDesc {
113
+ background-image: url();
114
+ }
115
+ .tablesorter-default thead .sorter-false {
116
+ background-image: none;
117
+ cursor: default;
118
+ padding-right: 5px;
119
+ }
.local/share/jupyter/nbextensions/varInspector/main.js ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ define([
2
+ 'require',
3
+ 'jquery',
4
+ 'base/js/namespace',
5
+ 'base/js/events',
6
+ 'notebook/js/codecell'
7
+ ], function(
8
+ requirejs,
9
+ $,
10
+ Jupyter,
11
+ events,
12
+ codecell
13
+ ) {
14
+ "use strict";
15
+
16
+ var mod_name = "varInspector";
17
+ var log_prefix = '[' + mod_name + '] ';
18
+
19
+
20
+ // ...........Parameters configuration......................
21
+ // define default values for config parameters if they were not present in general settings (notebook.json)
22
+ var cfg = {
23
+ 'window_display': false,
24
+ 'cols': {
25
+ 'lenName': 16,
26
+ 'lenType': 16,
27
+ 'lenVar': 40
28
+ },
29
+ 'kernels_config' : {
30
+ 'python': {
31
+ library: 'var_list.py',
32
+ delete_cmd_prefix: 'del ',
33
+ delete_cmd_postfix: '',
34
+ varRefreshCmd: 'print(var_dic_list())'
35
+ },
36
+ 'r': {
37
+ library: 'var_list.r',
38
+ delete_cmd_prefix: 'rm(',
39
+ delete_cmd_postfix: ') ',
40
+ varRefreshCmd: 'cat(var_dic_list()) '
41
+ }
42
+ },
43
+ 'types_to_exclude': ['module', 'function', 'builtin_function_or_method', 'instance', '_Feature']
44
+ }
45
+
46
+
47
+
48
+ //.....................global variables....
49
+
50
+
51
+ var st = {}
52
+ st.config_loaded = false;
53
+ st.extension_initialized = false;
54
+ st.code_init = "";
55
+
56
+ function read_config(cfg, callback) { // read after nb is loaded
57
+ var config = Jupyter.notebook.config;
58
+ config.loaded.then(function() {
59
+ // config may be specified at system level or at document level.
60
+ // first, update defaults with config loaded from server
61
+ cfg = $.extend(true, cfg, config.data.varInspector);
62
+ // then update cfg with some vars found in current notebook metadata
63
+ // and save in nb metadata (then can be modified per document)
64
+
65
+ // window_display is taken from notebook metadata
66
+ if (Jupyter.notebook.metadata.varInspector) {
67
+ if (Jupyter.notebook.metadata.varInspector.window_display)
68
+ cfg.window_display = Jupyter.notebook.metadata.varInspector.window_display;
69
+ }
70
+
71
+ cfg = Jupyter.notebook.metadata.varInspector = $.extend(true,
72
+ cfg, Jupyter.notebook.metadata.varInspector);
73
+
74
+ // but cols and kernels_config are taken from system (if defined)
75
+ if (config.data.varInspector) {
76
+ if (config.data.varInspector.cols) {
77
+ cfg.cols = $.extend(true, cfg.cols, config.data.varInspector.cols);
78
+ }
79
+ if (config.data.varInspector.kernels_config) {
80
+ cfg.kernels_config = $.extend(true, cfg.kernels_config, config.data.varInspector.kernels_config);
81
+ }
82
+ }
83
+
84
+ // call callbacks
85
+ callback && callback();
86
+ st.config_loaded = true;
87
+ })
88
+ return cfg;
89
+ }
90
+
91
+ var sortable;
92
+
93
+ function toggleVarInspector() {
94
+ toggle_varInspector(cfg, st)
95
+ }
96
+
97
+ var varInspector_button = function() {
98
+ if (!Jupyter.toolbar) {
99
+ events.on("app_initialized.NotebookApp", varInspector_button);
100
+ return;
101
+ }
102
+ if ($("#varInspector_button").length === 0) {
103
+ $(Jupyter.toolbar.add_buttons_group([
104
+ Jupyter.keyboard_manager.actions.register ({
105
+ 'help' : 'Variable Inspector',
106
+ 'icon' : 'fa-crosshairs',
107
+ 'handler': toggleVarInspector,
108
+ }, 'toggle-variable-inspector', 'varInspector')
109
+ ])).find('.btn').attr('id', 'varInspector_button');
110
+ }
111
+ };
112
+
113
+ var load_css = function() {
114
+ var link = document.createElement("link");
115
+ link.type = "text/css";
116
+ link.rel = "stylesheet";
117
+ link.href = requirejs.toUrl("./main.css");
118
+ document.getElementsByTagName("head")[0].appendChild(link);
119
+ };
120
+
121
+
122
+ function html_table(jsonVars) {
123
+ function _trunc(x, L) {
124
+ x = String(x)
125
+ if (x.length < L) return x
126
+ else return x.substring(0, L - 3) + '...'
127
+ }
128
+ var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
129
+ var kernel_config = cfg.kernels_config[kernelLanguage];
130
+ var varList = JSON.parse(String(jsonVars))
131
+
132
+ var shape_str = '';
133
+ var has_shape = false;
134
+ if (varList.some(listVar => "varShape" in listVar && listVar.varShape !== '')) { //if any of them have a shape
135
+ shape_str = '<th >Shape</th>';
136
+ has_shape = true;
137
+ }
138
+ var beg_table = '<div class=\"inspector\"><table class=\"table fixed table-condensed table-nonfluid \"><col /> \
139
+ <col /><col /><thead><tr><th >X</th><th >Name</th><th >Type</th><th >Size</th>' + shape_str + '<th >Value</th></tr></thead><tr><td> \
140
+ </td></tr>';
141
+ varList.forEach(listVar => {
142
+ var shape_col_str = '</td><td>';
143
+ if (has_shape) {
144
+ shape_col_str = '</td><td>' + listVar.varShape + '</td><td>';
145
+ }
146
+ beg_table +=
147
+ '<tr><td><a href=\"#\" onClick=\"Jupyter.notebook.kernel.execute(\'' +
148
+ kernel_config.delete_cmd_prefix + listVar.varName + kernel_config.delete_cmd_postfix + '\'' + '); ' +
149
+ 'Jupyter.notebook.events.trigger(\'varRefresh\'); \">x</a></td>' +
150
+ '<td>' + _trunc(listVar.varName, cfg.cols.lenName) + '</td><td>' + _trunc(listVar.varType, cfg.cols.lenType) +
151
+ '</td><td>' + listVar.varSize + shape_col_str + _trunc(listVar.varContent, cfg.cols.lenVar) +
152
+ '</td></tr>';
153
+ });
154
+ var full_table = beg_table + '</table></div>';
155
+ return full_table;
156
+ }
157
+
158
+
159
+
160
+ function code_exec_callback(msg) {
161
+ var jsonVars = msg.content['text'];
162
+ var notWellDefined = false;
163
+ if (msg.content.evalue)
164
+ notWellDefined = msg.content.evalue == "name 'var_dic_list' is not defined" ||
165
+ msg.content.evalue.substr(0,28) == "Error in cat(var_dic_list())"
166
+ //means that var_dic_list was cleared ==> need to retart the extension
167
+ if (notWellDefined) varInspector_init()
168
+ else $('#varInspector').html(html_table(jsonVars))
169
+
170
+ requirejs(['nbextensions/varInspector/jquery.tablesorter.min'],
171
+ function() {
172
+ setTimeout(function() { if ($('#varInspector').length>0)
173
+ $('#varInspector table').tablesorter()}, 50)
174
+ });
175
+ }
176
+
177
+ function tableSort() {
178
+ requirejs(['nbextensions/varInspector/jquery.tablesorter.min'])
179
+ $('#varInspector table').tablesorter()
180
+ }
181
+
182
+ var varRefresh = function() {
183
+ var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
184
+ var kernel_config = cfg.kernels_config[kernelLanguage];
185
+ requirejs(['nbextensions/varInspector/jquery.tablesorter.min'],
186
+ function() {
187
+ Jupyter.notebook.kernel.execute(
188
+ kernel_config.varRefreshCmd, { iopub: { output: code_exec_callback } }, { silent: false }
189
+ );
190
+ });
191
+ }
192
+
193
+
194
+ var varInspector_init = function() {
195
+ // Define code_init
196
+ // read and execute code_init
197
+ function read_code_init(lib) {
198
+ var libName = Jupyter.notebook.base_url + "nbextensions/varInspector/" + lib;
199
+ $.get(libName).done(function(data) {
200
+ st.code_init = data;
201
+ st.code_init = st.code_init.replace('lenName', cfg.cols.lenName).replace('lenType', cfg.cols.lenType)
202
+ .replace('lenVar', cfg.cols.lenVar)
203
+ //.replace('types_to_exclude', JSON.stringify(cfg.types_to_exclude).replace(/\"/g, "'"))
204
+ requirejs(
205
+ [
206
+ 'nbextensions/varInspector/jquery.tablesorter.min'
207
+ //'nbextensions/varInspector/colResizable-1.6.min'
208
+ ],
209
+ function() {
210
+ Jupyter.notebook.kernel.execute(st.code_init, { iopub: { output: code_exec_callback } }, { silent: false });
211
+ })
212
+ variable_inspector(cfg, st); // create window if not already present
213
+ console.log(log_prefix + 'loaded library');
214
+ }).fail(function() {
215
+ console.log(log_prefix + 'failed to load ' + lib + ' library')
216
+ });
217
+ }
218
+
219
+ // read configuration
220
+
221
+ cfg = read_config(cfg, function() {
222
+ // Called when config is available
223
+ if (typeof Jupyter.notebook.kernel !== "undefined" && Jupyter.notebook.kernel !== null) {
224
+ var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
225
+ var kernel_config = cfg.kernels_config[kernelLanguage];
226
+ if (kernel_config === undefined) { // Kernel is not supported
227
+ console.warn(log_prefix + " Sorry, can't use kernel language " + kernelLanguage + ".\n" +
228
+ "Configurations are currently only defined for the following languages:\n" +
229
+ Object.keys(cfg.kernels_config).join(', ') + "\n" +
230
+ "See readme for more details.");
231
+ if ($("#varInspector_button").length > 0) { // extension was present
232
+ $("#varInspector_button").remove();
233
+ $('#varInspector-wrapper').remove();
234
+ // turn off events
235
+ events.off('execute.CodeCell', varRefresh);
236
+ events.off('varRefresh', varRefresh);
237
+ }
238
+ return
239
+ }
240
+ varInspector_button(); // In case button was removed
241
+ // read and execute code_init (if kernel is supported)
242
+ read_code_init(kernel_config.library);
243
+ // console.log("code_init-->", st.code_init)
244
+ } else {
245
+ console.warn(log_prefix + "Kernel not available?");
246
+ }
247
+ }); // called after config is stable
248
+
249
+ // event: on cell execution, update the list of variables
250
+ events.on('execute.CodeCell', varRefresh);
251
+ events.on('varRefresh', varRefresh);
252
+ }
253
+
254
+
255
+ var create_varInspector_div = function(cfg, st) {
256
+ function save_position(){
257
+ Jupyter.notebook.metadata.varInspector.position = {
258
+ 'left': $('#varInspector-wrapper').css('left'),
259
+ 'top': $('#varInspector-wrapper').css('top'),
260
+ 'width': $('#varInspector-wrapper').css('width'),
261
+ 'height': $('#varInspector-wrapper').css('height'),
262
+ 'right': $('#varInspector-wrapper').css('right')
263
+ };
264
+ }
265
+ var varInspector_wrapper = $('<div id="varInspector-wrapper"/>')
266
+ .append(
267
+ $('<div id="varInspector-header"/>')
268
+ .addClass("header")
269
+ .text("Variable Inspector ")
270
+ .append(
271
+ $("<a/>")
272
+ .attr("href", "#")
273
+ .text("[x]")
274
+ .addClass("kill-btn")
275
+ .attr('title', 'Close window')
276
+ .click(function() {
277
+ toggleVarInspector();
278
+ return false;
279
+ })
280
+ )
281
+ .append(
282
+ $("<a/>")
283
+ .attr("href", "#")
284
+ .addClass("hide-btn")
285
+ .attr('title', 'Hide Variable Inspector')
286
+ .text("[-]")
287
+ .click(function() {
288
+ $('#varInspector-wrapper').css('position', 'fixed');
289
+ $('#varInspector').slideToggle({
290
+ start: function(event, ui) {
291
+ // $(this).width($(this).width());
292
+ },
293
+ 'complete': function() {
294
+ Jupyter.notebook.metadata.varInspector['varInspector_section_display'] = $('#varInspector').css('display');
295
+ save_position();
296
+ Jupyter.notebook.set_dirty();
297
+ }
298
+ });
299
+ $('#varInspector-wrapper').toggleClass('closed');
300
+ if ($('#varInspector-wrapper').hasClass('closed')) {
301
+ cfg.oldHeight = $('#varInspector-wrapper').height(); //.css('height');
302
+ $('#varInspector-wrapper').css({ height: 40 });
303
+ $('#varInspector-wrapper .hide-btn')
304
+ .text('[+]')
305
+ .attr('title', 'Show Variable Inspector');
306
+ } else {
307
+ $('#varInspector-wrapper').height(cfg.oldHeight); //css({ height: cfg.oldHeight });
308
+ $('#varInspector').height(cfg.oldHeight - $('#varInspector-header').height() - 30 )
309
+ $('#varInspector-wrapper .hide-btn')
310
+ .text('[-]')
311
+ .attr('title', 'Hide Variable Inspector');
312
+ }
313
+ return false;
314
+ })
315
+ ).append(
316
+ $("<a/>")
317
+ .attr("href", "#")
318
+ .text(" \u21BB")
319
+ .addClass("reload-btn")
320
+ .attr('title', 'Reload Variable Inspector')
321
+ .click(function() {
322
+ //variable_inspector(cfg,st);
323
+ varRefresh();
324
+ return false;
325
+ })
326
+ ).append(
327
+ $("<span/>")
328
+ .html("&nbsp;&nbsp")
329
+ ).append(
330
+ $("<span/>")
331
+ .html("&nbsp;&nbsp;")
332
+ )
333
+ ).append(
334
+ $("<div/>").attr("id", "varInspector").addClass('varInspector')
335
+ )
336
+
337
+ $("body").append(varInspector_wrapper);
338
+ // Ensure position is fixed
339
+ $('#varInspector-wrapper').css('position', 'fixed');
340
+
341
+ // enable dragging and save position on stop moving
342
+ $('#varInspector-wrapper').draggable({
343
+ drag: function(event, ui) {}, //end of drag function
344
+ start: function(event, ui) {
345
+ $(this).width($(this).width());
346
+ },
347
+ stop: function(event, ui) { // on save, store window position
348
+ save_position();
349
+ Jupyter.notebook.set_dirty();
350
+ // Ensure position is fixed (again)
351
+ $('#varInspector-wrapper').css('position', 'fixed');
352
+ },
353
+ });
354
+
355
+ $('#varInspector-wrapper').resizable({
356
+ resize: function(event, ui) {
357
+ $('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height());
358
+ },
359
+ start: function(event, ui) {
360
+ //$(this).width($(this).width());
361
+ $(this).css('position', 'fixed');
362
+ },
363
+ stop: function(event, ui) { // on save, store window position
364
+ save_position();
365
+ $('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height())
366
+ Jupyter.notebook.set_dirty();
367
+ // Ensure position is fixed (again)
368
+ //$(this).css('position', 'fixed');
369
+ }
370
+ })
371
+
372
+ // restore window position at startup
373
+ if (Jupyter.notebook.metadata.varInspector.position !== undefined) {
374
+ $('#varInspector-wrapper').css(Jupyter.notebook.metadata.varInspector.position);
375
+ }
376
+ // Ensure position is fixed
377
+ $('#varInspector-wrapper').css('position', 'fixed');
378
+
379
+ // Restore window display
380
+ if (Jupyter.notebook.metadata.varInspector !== undefined) {
381
+ if (Jupyter.notebook.metadata.varInspector['varInspector_section_display'] !== undefined) {
382
+ $('#varInspector').css('display', Jupyter.notebook.metadata.varInspector['varInspector_section_display'])
383
+ //$('#varInspector').css('height', $('#varInspector-wrapper').height() - $('#varInspector-header').height())
384
+ if (Jupyter.notebook.metadata.varInspector['varInspector_section_display'] == 'none') {
385
+ $('#varInspector-wrapper').addClass('closed');
386
+ $('#varInspector-wrapper').css({ height: 40 });
387
+ $('#varInspector-wrapper .hide-btn')
388
+ .text('[+]')
389
+ .attr('title', 'Show Variable Inspector');
390
+ }
391
+ }
392
+ if (Jupyter.notebook.metadata.varInspector['window_display'] !== undefined) {
393
+ console.log(log_prefix + "Restoring Variable Inspector window");
394
+ $('#varInspector-wrapper').css('display', Jupyter.notebook.metadata.varInspector['window_display'] ? 'block' : 'none');
395
+ if ($('#varInspector-wrapper').hasClass('closed')){
396
+ $('#varInspector').height(cfg.oldHeight - $('#varInspector-header').height())
397
+ }else{
398
+ $('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height()-30)
399
+ }
400
+
401
+ }
402
+ }
403
+ // if varInspector-wrapper is undefined (first run(?), then hide it)
404
+ if ($('#varInspector-wrapper').css('display') == undefined) $('#varInspector-wrapper').css('display', "none") //block
405
+
406
+ varInspector_wrapper.addClass('varInspector-float-wrapper');
407
+ }
408
+
409
+ var variable_inspector = function(cfg, st) {
410
+
411
+ var varInspector_wrapper = $("#varInspector-wrapper");
412
+ if (varInspector_wrapper.length === 0) {
413
+ create_varInspector_div(cfg, st);
414
+ }
415
+
416
+ $(window).resize(function() {
417
+ $('#varInspector').css({ maxHeight: $(window).height() - 30 });
418
+ $('#varInspector-wrapper').css({ maxHeight: $(window).height() - 10 });
419
+ });
420
+
421
+ $(window).trigger('resize');
422
+ varRefresh();
423
+ };
424
+
425
+ var toggle_varInspector = function(cfg, st) {
426
+ // toggle draw (first because of first-click behavior)
427
+ $("#varInspector-wrapper").toggle({
428
+ 'progress': function() {},
429
+ 'complete': function() {
430
+ Jupyter.notebook.metadata.varInspector['window_display'] = $('#varInspector-wrapper').css('display') == 'block';
431
+ Jupyter.notebook.set_dirty();
432
+ // recompute:
433
+ variable_inspector(cfg, st);
434
+ }
435
+ });
436
+ };
437
+
438
+
439
+ var load_jupyter_extension = function() {
440
+ load_css(); //console.log("Loading css")
441
+ varInspector_button(); //console.log("Adding varInspector_button")
442
+
443
+ // If a kernel is available,
444
+ if (typeof Jupyter.notebook.kernel !== "undefined" && Jupyter.notebook.kernel !== null) {
445
+ console.log(log_prefix + "Kernel is available -- varInspector initializing ")
446
+ varInspector_init();
447
+ }
448
+ // if a kernel wasn't available, we still wait for one. Anyway, we will run this for new kernel
449
+ // (test if is is a Python kernel and initialize)
450
+ // on kernel_ready.Kernel, a new kernel has been started and we shall initialize the extension
451
+ events.on("kernel_ready.Kernel", function(evt, data) {
452
+ console.log(log_prefix + "Kernel is available -- reading configuration");
453
+ varInspector_init();
454
+ });
455
+ };
456
+
457
+ return {
458
+ load_ipython_extension: load_jupyter_extension,
459
+ varRefresh: varRefresh
460
+ };
461
+
462
+ });
.local/share/jupyter/nbextensions/varInspector/varInspector.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type: Jupyter Notebook Extension
2
+ Name: Variable Inspector
3
+ Description: The Variable Inspector extension collects all defined variables and display them in a floating window. The extension is also draggable, resizable, collapsable.
4
+ Link: README.md
5
+ Icon: icon.png
6
+ Main: main.js
7
+ Compatibility: 4.x, 5.x
8
+ Parameters:
9
+ - name: varInspector.window_display
10
+ description: Display window at startup
11
+ input_type: checkbox
12
+ default: false
13
+ - name: varInspector.cols.lenName
14
+ description: Variable name - Maximum number of characters to display
15
+ input_type: number
16
+ default: 16
17
+ - name: varInspector.cols.lenType
18
+ description: Variable type - Maximum number of characters to display
19
+ input_type: number
20
+ default: 16
21
+ - name: varInspector.cols.lenVar
22
+ description: Variable value/content - Maximum number of characters to display
23
+ input_type: number
24
+ default: 40
25
+ - name: varInspector.kernels_config
26
+ description: |
27
+ json object defining the libraries to load, the delete/remove
28
+ kernel commands to delete a variable, and
29
+ finally the commands to refresh the list of variables.
30
+ input_type: json_object
31
+ default: |
32
+ {
33
+ 'python': {
34
+ library: 'var_list.py',
35
+ delete_cmd_prefix: 'del ',
36
+ delete_cmd_postfix: '',
37
+ varRefreshCmd: 'print(var_dic_list())'
38
+ },
39
+ 'r': {
40
+ library: 'var_list.r',
41
+ delete_cmd_prefix: 'rm(',
42
+ delete_cmd_postfix: ') ',
43
+ varRefreshCmd: 'cat(var_dic_list()) '
44
+ }
45
+ }
.local/share/jupyter/nbextensions/varInspector/var_list.r ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(jsonlite)
2
+ var_dic_list = function(){
3
+ ll = ls(.GlobalEnv, all.names = FALSE)
4
+ varList=list()
5
+ iter = 1
6
+ for (k in ll){
7
+ if (class(get(k))!='function'){
8
+ class = class(get(k)); rk = capture.output(str(get(k))); size = object.size(get(k)); sk = substr(get(k),0, 200);
9
+ # [{'varName':v, 'varType': type(eval(v)).__name__, 'varSize': _getsizeof(eval(v)), 'varContent': str(eval(v))[:200]}
10
+ l = list(varName = k, varType = class, varSize = size, varContent = sk)
11
+ varList[[iter]] = l
12
+ # print(l)
13
+ iter = iter + 1}
14
+ }
15
+ return(toJSON(varList, simplifyVector = FALSE, force=TRUE))
16
+ }
17
+ cat(var_dic_list())
.local/share/jupyter/nbextensions/zenmode/README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Zenmode
2
+ =======
3
+
4
+ A little extension to give Zenmode functionality to the IPython notebook
.local/share/jupyter/nbextensions/zenmode/images/back3.jpg ADDED
.local/share/jupyter/nbextensions/zenmode/main.css ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .navbar-inner {
2
+ opacity: 0.5;
3
+ -webkit-transition: opacity 0.3s ease-in-out;
4
+ -moz-transition: opacity 0.3s ease-in-out;
5
+ -o-transition: opacity 0.3s ease-in-out;
6
+ transition: opacity 0.3s ease-in-out;
7
+ }
8
+
9
+ .navbar-inner:hover {
10
+ opacity: 1.0;
11
+ }
12
+
13
+ #maintoolbar .navbar-text {
14
+ display: none !important;
15
+ }
16
+
17
+ #notebook-container {
18
+ background-color: rgba(255, 255, 255, 0);
19
+ }
20
+
21
+ /*
22
+ .cell {
23
+ background-color: rgb(255, 255, 255);
24
+ }
25
+
26
+ .CodeMirror {
27
+ background: #F8FCCF;
28
+ }
29
+
30
+ div.input_area {
31
+ margin: 2px;
32
+ border: none;
33
+ }
34
+ */
.local/share/jupyter/nbextensions/zenmode/main.js ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * ----------------------------------------------------------------------------
3
+ * Copyright (c) 2013 - Damián Avila
4
+ * Copyright (c) 2015 - Joshua Cooke Barnes (jcb91)
5
+ *
6
+ * Distributed under the terms of the Modified BSD License.
7
+ *
8
+ * A little extension to give Zenmode functionality to the IPython notebook.
9
+ * ----------------------------------------------------------------------------
10
+ */
11
+
12
+ define([
13
+ "require",
14
+ "jquery",
15
+ "base/js/namespace",
16
+ "base/js/events"
17
+ ], function(
18
+ requirejs,
19
+ $,
20
+ IPython,
21
+ events
22
+ ) {
23
+ "use_strict";
24
+
25
+ var backgrounds = [
26
+ 'back11.jpg', 'back12.jpg', 'back2.jpg', 'back21.jpg', 'back22.jpg',
27
+ 'back3.jpg', 'ipynblogo0.png', 'ipynblogo1.png'
28
+ ];
29
+
30
+ var hide_header = true;
31
+ var hide_menubar = true
32
+
33
+ var getZenModeActive = function() {
34
+ return ($('link#zenmodecss')[0] !== undefined);
35
+ };
36
+
37
+ // not sure when this changed, so maybe this is the wrong comparison to make
38
+ var use_layout_manager = (Number(IPython.version.split(".")[0]) < 3);
39
+ var header_pattern = use_layout_manager ? '#header' : '#header-container';
40
+ if (use_layout_manager) {
41
+ // We need to redefine this function because in the IPython codebase
42
+ // the app_height function does not take into account the possibility
43
+ // to hide the header and 'menubar' bar.
44
+ IPython.layout_manager.app_height = function() {
45
+ var get_height = function(pattern) {
46
+ var el = $(pattern);
47
+ return getZenModeActive() ? 0 : el.outerHeight(true);
48
+ };
49
+ var h = $(window).height();
50
+ // content height
51
+ return h - get_height(header_pattern) - get_height('#menubar') - get_height('#maintoolbar');
52
+ };
53
+ }
54
+
55
+ var menu_pattern = '#menubar';
56
+ var oldBgAttrName = "zenmode-old-bg";
57
+ var toggleZenMode = function (background) {
58
+ if (getZenModeActive()) {
59
+ console.log('toggling zenmode off');
60
+ $('#zenmode-toggle-btn .fa').removeClass("fa-rebel").addClass("fa-empire");
61
+ $('#zenmodecss').remove();
62
+
63
+
64
+ // Remove zenmode css settings only when changes were made.
65
+ if (backgrounds.length != 0) {
66
+ $('body').css({
67
+ 'background-image': 'none'
68
+ })
69
+ }
70
+
71
+ // This should be changed at some point in the future to preserve non-zenmode visibility settings
72
+ $(menu_pattern).toggle(true);
73
+ $(header_pattern).toggle(true);
74
+ }
75
+ else {
76
+ console.log('toggling zenmode on');
77
+ $('#zenmode-toggle-btn .fa').removeClass("fa-empire").addClass("fa-rebel");
78
+ $('head').append(
79
+ $('<link id="zenmodecss" rel="stylesheet" type="text/css"/>').attr(
80
+ 'href', requirejs.toUrl("./main.css"))
81
+ );
82
+
83
+ if (background === undefined){
84
+ background = backgrounds[Math.floor(Math.random() * backgrounds.length)];
85
+ }
86
+ var absolute_url_pat = /^https?:\/\/|^\/\//i;
87
+ if (!absolute_url_pat.test(background)) {
88
+ background = requirejs.toUrl("./images/" + background);
89
+ }
90
+
91
+ // Apply zenmode css when there are images to be used.
92
+ if (backgrounds.length != 0) {
93
+ $('body').css({
94
+ 'background-image': 'url(' + background + ')',
95
+ 'background-repeat': 'no-repeat',
96
+ 'background-position': 'center center',
97
+ 'background-attachment': 'fixed',
98
+ '-webkit-background-size': 'cover',
99
+ '-moz-background-size': 'cover',
100
+ '-o-background-size': 'cover',
101
+ 'background-size': 'cover'
102
+ });
103
+ }
104
+
105
+ if (hide_menubar)
106
+ {$(menu_pattern).toggle(false);}
107
+ if (hide_header)
108
+ {$(header_pattern).toggle(false);}
109
+ }
110
+
111
+ // Lastly get notebook to do a resize
112
+ if (use_layout_manager) {
113
+ IPython.layout_manager.app_height();
114
+ IPython.layout_manager.do_resize();
115
+ }
116
+ else {
117
+ events.trigger("resize-header.Page");
118
+ }
119
+ };
120
+
121
+ var setZenModeActive = function(active, background) {
122
+ if (active === undefined) { active = true; }
123
+ console.log("zenmode ->", active);
124
+ if (getZenModeActive() != active) { toggleZenMode(background); }
125
+ };
126
+
127
+ var initialize = function () {
128
+ var config = IPython.notebook.config;
129
+ if (config.data.hasOwnProperty('zenmode_hide_header')) {
130
+ if (!config.data.zenmode_hide_header) {
131
+ console.log("not hiding notebook header");
132
+ hide_header = false;
133
+ }
134
+ }
135
+
136
+ if (config.data.hasOwnProperty('zenmode_hide_menubar')) {
137
+ if (!config.data.zenmode_hide_menubar) {
138
+ console.log("not hiding notebook menubar");
139
+ hide_menubar = false;
140
+ }
141
+ }
142
+
143
+ if (config.data.hasOwnProperty('zenmode_use_builtin_backgrounds')) {
144
+ if (!config.data.zenmode_use_builtin_backgrounds) {
145
+ console.log("not using builtin zenmode_backgrounds");
146
+ backgrounds.length = 0;
147
+ }
148
+ }
149
+
150
+ if (config.data.hasOwnProperty('zenmode_backgrounds')) {
151
+ if (config.data.zenmode_backgrounds.length > 0) {
152
+ var new_bg_urls = config.data.zenmode_backgrounds;
153
+ for (var ii=0; ii < new_bg_urls.length; ii++) {
154
+ var bg_url = new_bg_urls[ii].replace(/^\s+|\s+$/g, '');
155
+ if (bg_url.length > 0 && bg_url[0] != '#') {
156
+ backgrounds.push(bg_url);
157
+ }
158
+ }
159
+ console.log("additional zenmode backgrounds added");
160
+ }
161
+ }
162
+ console.log("zenmode_backgrounds = ", backgrounds);
163
+
164
+ if (config.data.hasOwnProperty('zenmode_set_zenmode_on_load')) {
165
+ setZenModeActive(
166
+ config.data.zenmode_set_zenmode_on_load ? true : false
167
+ );
168
+ }
169
+ };
170
+
171
+ var load_ipython_extension = function(background) {
172
+ $(IPython.toolbar.add_buttons_group([
173
+ IPython.keyboard_manager.actions.register({
174
+ 'help' : 'Enter/Exit Zenmode',
175
+ 'icon' : 'fa-empire',
176
+ 'handler': function() {
177
+ toggleZenMode(background);
178
+ setTimeout(function () {
179
+ $('#zenmode-toggle-btn').blur();
180
+ }, 500);
181
+ },
182
+ }, 'toggle-zenmode', 'zenmode'),
183
+ ], 'zenmode-btn-grp')).find('.btn').attr('id', 'zenmode-toggle-btn');
184
+ $("#maintoolbar-container").prepend($('#zenmode-btn-grp'));
185
+ return IPython.notebook.config.loaded.then(initialize);
186
+ };
187
+
188
+ var extension = {
189
+ load_ipython_extension : load_ipython_extension,
190
+ backgrounds : backgrounds,
191
+ toggleZenMode : toggleZenMode,
192
+ getZenModeActive : getZenModeActive,
193
+ setZenModeActive : setZenModeActive
194
+ };
195
+ return extension;
196
+ });
.local/share/jupyter/nbextensions/zenmode/zenmode.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Type: IPython Notebook Extension
2
+ Name: zenmode
3
+ Link: README.md
4
+ Description: A little extension to give Zenmode functionality to the IPython notebook
5
+ Main: main.js
6
+ Compatibility: 4.x, 5.x
7
+ Parameters:
8
+ - name: zenmode_set_zenmode_on_load
9
+ description: Set zenmode on when a notebook opens
10
+ input_type: checkbox
11
+ default: true
12
+ - name: zenmode_use_builtin_backgrounds
13
+ description: Use builtin backgrounds in addition to any specified by URL
14
+ input_type: checkbox
15
+ default: true
16
+ - name: zenmode_hide_header
17
+ description: Hide the header in zenmode
18
+ input_type: checkbox
19
+ default: true
20
+ - name: zenmode_hide_menubar
21
+ description: Hide the menubar in zenmode
22
+ input_type: checkbox
23
+ default: true
24
+ - name: zenmode_backgrounds
25
+ description: "Urls to use as backgrounds. Any beginning with # are ignored."
26
+ input_type: list
27
+ list_element:
28
+ input_type: url
.triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.cubin ADDED
Binary file (60 kB). View file
 
.triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.ttir ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
3
+ %cst = arith.constant dense<0.000000e+00> : tensor<16x256xbf16>
4
+ %cst_0 = arith.constant dense<1.000000e+00> : tensor<1x256xf32>
5
+ %cst_1 = arith.constant dense<0.000000e+00> : tensor<1x256xf32>
6
+ %cst_2 = arith.constant 0.000000e+00 : f32
7
+ %cst_3 = arith.constant dense<256> : tensor<16x1xi64>
8
+ %cst_4 = arith.constant dense<50257> : tensor<16x1xi64>
9
+ %cst_5 = arith.constant dense<0> : tensor<16x1xi64>
10
+ %cst_6 = arith.constant dense<9.99999974E-6> : tensor<16x1xf32>
11
+ %cst_7 = arith.constant dense<2.560000e+02> : tensor<16x1xf32>
12
+ %cst_8 = arith.constant dense<0.000000e+00> : tensor<16x256xf32>
13
+ %cst_9 = arith.constant dense<256> : tensor<16x1xi32>
14
+ %cst_10 = arith.constant dense<256> : tensor<1x256xi32>
15
+ %cst_11 = arith.constant dense<512> : tensor<16x1xi32>
16
+ %c16_i32 = arith.constant 16 : i32
17
+ %0 = tt.get_program_id x : i32
18
+ %1 = arith.muli %0, %c16_i32 : i32
19
+ %2 = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32>
20
+ %3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<16xi32>) -> tensor<16x1xi32>
21
+ %4 = tt.splat %1 : (i32) -> tensor<16x1xi32>
22
+ %5 = arith.addi %4, %3 : tensor<16x1xi32>
23
+ %6 = tt.make_range {end = 256 : i32, start = 0 : i32} : tensor<256xi32>
24
+ %7 = tt.expand_dims %6 {axis = 0 : i32} : (tensor<256xi32>) -> tensor<1x256xi32>
25
+ %8 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<16x1x!tt.ptr<i64, 1>>
26
+ %9 = tt.addptr %8, %5 : tensor<16x1x!tt.ptr<i64, 1>>, tensor<16x1xi32>
27
+ %10 = tt.load %9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x1xi64>
28
+ %11 = arith.remsi %5, %cst_11 : tensor<16x1xi32>
29
+ %12 = arith.cmpi slt, %7, %cst_10 : tensor<1x256xi32>
30
+ %13 = arith.muli %11, %cst_9 : tensor<16x1xi32>
31
+ %14 = tt.broadcast %7 : (tensor<1x256xi32>) -> tensor<16x256xi32>
32
+ %15 = tt.broadcast %13 : (tensor<16x1xi32>) -> tensor<16x256xi32>
33
+ %16 = arith.addi %14, %15 : tensor<16x256xi32>
34
+ %17 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<16x256x!tt.ptr<f32, 1>>
35
+ %18 = tt.addptr %17, %16 : tensor<16x256x!tt.ptr<f32, 1>>, tensor<16x256xi32>
36
+ %19 = tt.broadcast %12 : (tensor<1x256xi1>) -> tensor<16x256xi1>
37
+ %20 = tt.load %18, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
38
+ %21 = arith.muli %5, %cst_9 : tensor<16x1xi32>
39
+ %22 = tt.broadcast %21 : (tensor<16x1xi32>) -> tensor<16x256xi32>
40
+ %23 = arith.addi %14, %22 : tensor<16x256xi32>
41
+ %24 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<16x256x!tt.ptr<bf16, 1>>
42
+ %25 = tt.addptr %24, %23 : tensor<16x256x!tt.ptr<bf16, 1>>, tensor<16x256xi32>
43
+ %26 = tt.load %25, %19, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xbf16>
44
+ %27 = arith.extf %26 : tensor<16x256xbf16> to tensor<16x256xf32>
45
+ %28 = arith.addi %10, %cst_4 : tensor<16x1xi64>
46
+ %29 = arith.cmpi slt, %10, %cst_5 : tensor<16x1xi64>
47
+ %30 = arith.select %29, %28, %10 : tensor<16x1xi1>, tensor<16x1xi64>
48
+ %31 = arith.cmpi sge, %30, %cst_5 : tensor<16x1xi64>
49
+ %32 = arith.cmpi slt, %30, %cst_4 : tensor<16x1xi64>
50
+ %33 = arith.andi %31, %32 : tensor<16x1xi1>
51
+ tt.assert %33, "index out of bounds: 0 <= tmp3 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<16x1xi1>
52
+ %34 = arith.muli %30, %cst_3 : tensor<16x1xi64>
53
+ %35 = tt.broadcast %34 : (tensor<16x1xi64>) -> tensor<16x256xi64>
54
+ %36 = arith.extsi %7 : tensor<1x256xi32> to tensor<1x256xi64>
55
+ %37 = tt.broadcast %36 : (tensor<1x256xi64>) -> tensor<16x256xi64>
56
+ %38 = arith.addi %37, %35 : tensor<16x256xi64>
57
+ %39 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<16x256x!tt.ptr<f32, 1>>
58
+ %40 = tt.addptr %39, %38 : tensor<16x256x!tt.ptr<f32, 1>>, tensor<16x256xi64>
59
+ %41 = tt.load %40, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
60
+ %42 = arith.addf %41, %20 : tensor<16x256xf32>
61
+ %43 = arith.addf %42, %27 : tensor<16x256xf32>
62
+ %44 = arith.addf %43, %cst_8 : tensor<16x256xf32>
63
+ %45 = arith.subf %43, %44 : tensor<16x256xf32>
64
+ %46 = arith.mulf %43, %45 : tensor<16x256xf32>
65
+ %47 = arith.addf %46, %cst_8 : tensor<16x256xf32>
66
+ %48 = arith.select %19, %44, %cst_8 : tensor<16x256xi1>, tensor<16x256xf32>
67
+ %49 = arith.select %19, %47, %cst_8 : tensor<16x256xi1>, tensor<16x256xf32>
68
+ %50 = arith.select %12, %cst_0, %cst_1 : tensor<1x256xi1>, tensor<1x256xf32>
69
+ %51 = tt.broadcast %50 : (tensor<1x256xf32>) -> tensor<16x256xf32>
70
+ %52:3 = "tt.reduce"(%48, %49, %51) <{axis = 1 : i32}> ({
71
+ ^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
72
+ %76 = arith.subf %arg11, %arg8 : f32
73
+ %77 = arith.addf %arg10, %arg13 : f32
74
+ %78 = arith.cmpf oeq, %77, %cst_2 : f32
75
+ %79 = arith.divf %arg13, %77 : f32
76
+ %80 = arith.select %78, %cst_2, %79 : f32
77
+ %81 = arith.mulf %76, %80 : f32
78
+ %82 = arith.addf %arg8, %81 : f32
79
+ %83 = arith.addf %arg9, %arg12 : f32
80
+ %84 = arith.mulf %76, %76 : f32
81
+ %85 = arith.mulf %84, %arg10 : f32
82
+ %86 = arith.mulf %85, %80 : f32
83
+ %87 = arith.addf %83, %86 : f32
84
+ tt.reduce.return %82, %87, %77 : f32, f32, f32
85
+ }) : (tensor<16x256xf32>, tensor<16x256xf32>, tensor<16x256xf32>) -> (tensor<16xf32>, tensor<16xf32>, tensor<16xf32>)
86
+ %53 = tt.expand_dims %52#0 {axis = 1 : i32} : (tensor<16xf32>) -> tensor<16x1xf32>
87
+ %54 = tt.expand_dims %52#1 {axis = 1 : i32} : (tensor<16xf32>) -> tensor<16x1xf32>
88
+ %55 = tt.load %18, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
89
+ %56 = tt.load %25, %19, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<16x256xbf16>
90
+ %57 = arith.extf %56 : tensor<16x256xbf16> to tensor<16x256xf32>
91
+ %58 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x256x!tt.ptr<f32, 1>>
92
+ %59 = tt.addptr %58, %7 : tensor<1x256x!tt.ptr<f32, 1>>, tensor<1x256xi32>
93
+ %60 = tt.load %59, %12, %cst_1 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x256xf32>
94
+ tt.assert %33, "index out of bounds: 0 <= tmp16 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<16x1xi1>
95
+ %61 = tt.load %40, %19, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<16x256xf32>
96
+ %62 = arith.addf %61, %55 : tensor<16x256xf32>
97
+ %63 = arith.addf %62, %57 : tensor<16x256xf32>
98
+ %64 = tt.broadcast %53 : (tensor<16x1xf32>) -> tensor<16x256xf32>
99
+ %65 = arith.subf %63, %64 : tensor<16x256xf32>
100
+ %66 = arith.divf %54, %cst_7 : tensor<16x1xf32>
101
+ %67 = arith.addf %66, %cst_6 : tensor<16x1xf32>
102
+ %68 = tt.extern_elementwise %67 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<16x1xf32>) -> tensor<16x1xf32>
103
+ %69 = tt.broadcast %68 : (tensor<16x1xf32>) -> tensor<16x256xf32>
104
+ %70 = arith.mulf %65, %69 : tensor<16x256xf32>
105
+ %71 = tt.broadcast %60 : (tensor<1x256xf32>) -> tensor<16x256xf32>
106
+ %72 = arith.mulf %70, %71 : tensor<16x256xf32>
107
+ %73 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<16x256x!tt.ptr<bf16, 1>>
108
+ %74 = tt.addptr %73, %23 : tensor<16x256x!tt.ptr<bf16, 1>>, tensor<16x256xi32>
109
+ %75 = arith.truncf %72 : tensor<16x256xf32> to tensor<16x256xbf16>
110
+ tt.store %74, %75, %19 {cache = 1 : i32, evict = 1 : i32} : tensor<16x256xbf16>
111
+ tt.return
112
+ }
113
+ }
.triton/dump/174400122b6dbc99e086544aa1856b9f/triton_.cubin ADDED
Binary file (32 kB). View file
 
.triton/dump/199215289adb100508718a5a762ba4d7/triton_.cubin ADDED
Binary file (13 kB). View file
 
.triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.cubin ADDED
Binary file (5.54 kB). View file
 
.triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.ptx ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1d2de
10
+
11
+ .visible .entry triton__0d1d2de(
12
+ .param .u64 triton__0d1d2de_param_0,
13
+ .param .u64 triton__0d1d2de_param_1,
14
+ .param .u32 triton__0d1d2de_param_2
15
+ )
16
+ .maxntid 128, 1, 1
17
+ {
18
+ .reg .pred %p<4>;
19
+ .reg .b16 %rs<9>;
20
+ .reg .b32 %r<31>;
21
+ .reg .b64 %rd<8>;
22
+ .loc 1 18 0
23
+ $L__func_begin0:
24
+ .loc 1 18 0
25
+
26
+ ld.param.u64 %rd4, [triton__0d1d2de_param_0];
27
+ ld.param.u64 %rd5, [triton__0d1d2de_param_1];
28
+ $L__tmp0:
29
+ .loc 1 21 36
30
+ mov.u32 %r22, %tid.x;
31
+ shl.b32 %r23, %r22, 3;
32
+ and.b32 %r24, %r23, 1016;
33
+ .loc 1 20 28
34
+ mov.u32 %r1, %ctaid.x;
35
+ .loc 1 20 33
36
+ shl.b32 %r25, %r1, 10;
37
+ .loc 1 21 23
38
+ or.b32 %r26, %r25, %r24;
39
+ .loc 1 24 30
40
+ mul.wide.s32 %rd6, %r26, 4;
41
+ add.s64 %rd1, %rd4, %rd6;
42
+ add.s64 %rd2, %rd1, 16;
43
+ mov.pred %p1, -1;
44
+ .loc 1 24 35
45
+ mov.u32 %r10, 0x0;
46
+ mov.u32 %r11, 0x0;
47
+ mov.u32 %r12, 0x0;
48
+ mov.u32 %r13, 0x0;
49
+ @%p1 ld.global.v4.b32 { %r10, %r11, %r12, %r13 }, [ %rd1 + 0 ];
50
+ mov.u32 %r14, 0x0;
51
+ mov.u32 %r15, 0x0;
52
+ mov.u32 %r16, 0x0;
53
+ mov.u32 %r17, 0x0;
54
+ @%p1 ld.global.v4.b32 { %r14, %r15, %r16, %r17 }, [ %rd2 + 0 ];
55
+ .loc 1 26 25
56
+ mul.wide.s32 %rd7, %r26, 2;
57
+ add.s64 %rd3, %rd5, %rd7;
58
+ .loc 1 26 36
59
+ cvt.rn.bf16.f32 %rs1, %r10;
60
+ cvt.rn.bf16.f32 %rs2, %r11;
61
+ cvt.rn.bf16.f32 %rs3, %r12;
62
+ cvt.rn.bf16.f32 %rs4, %r13;
63
+ cvt.rn.bf16.f32 %rs5, %r14;
64
+ cvt.rn.bf16.f32 %rs6, %r15;
65
+ cvt.rn.bf16.f32 %rs7, %r16;
66
+ cvt.rn.bf16.f32 %rs8, %r17;
67
+ mov.b32 %r27, {%rs1, %rs2};
68
+ mov.b32 %r28, {%rs3, %rs4};
69
+ mov.b32 %r29, {%rs5, %rs6};
70
+ mov.b32 %r30, {%rs7, %rs8};
71
+ @%p1 st.global.v4.b32 [ %rd3 + 0 ], { %r27, %r28, %r29, %r30 };
72
+ .loc 1 26 4
73
+ ret;
74
+ $L__tmp1:
75
+ $L__func_end0:
76
+
77
+ }
78
+ .file 1 "/tmp/torchinductor_root/5t/c5tryp5qwkhreijk7s5x327wofz54lwj4kvctuqdzv2vrf2xyons.py"
79
+ .section .debug_abbrev
80
+ {
81
+ .b8 1
82
+ .b8 17
83
+ .b8 1
84
+ .b8 37
85
+ .b8 8
86
+ .b8 19
87
+ .b8 5
88
+ .b8 3
89
+ .b8 8
90
+ .b8 16
91
+ .b8 6
92
+ .b8 27
93
+ .b8 8
94
+ .b8 180
95
+ .b8 66
96
+ .b8 12
97
+ .b8 17
98
+ .b8 1
99
+ .b8 18
100
+ .b8 1
101
+ .b8 0
102
+ .b8 0
103
+ .b8 2
104
+ .b8 46
105
+ .b8 0
106
+ .b8 17
107
+ .b8 1
108
+ .b8 18
109
+ .b8 1
110
+ .b8 64
111
+ .b8 10
112
+ .b8 135
113
+ .b8 64
114
+ .b8 8
115
+ .b8 3
116
+ .b8 8
117
+ .b8 58
118
+ .b8 11
119
+ .b8 59
120
+ .b8 11
121
+ .b8 63
122
+ .b8 12
123
+ .b8 0
124
+ .b8 0
125
+ .b8 0
126
+ }
127
+ .section .debug_info
128
+ {
129
+ .b32 176
130
+ .b8 2
131
+ .b8 0
132
+ .b32 .debug_abbrev
133
+ .b8 8
134
+ .b8 1
135
+ .b8 116
136
+ .b8 114
137
+ .b8 105
138
+ .b8 116
139
+ .b8 111
140
+ .b8 110
141
+ .b8 0
142
+ .b8 2
143
+ .b8 0
144
+ .b8 99
145
+ .b8 53
146
+ .b8 116
147
+ .b8 114
148
+ .b8 121
149
+ .b8 112
150
+ .b8 53
151
+ .b8 113
152
+ .b8 119
153
+ .b8 107
154
+ .b8 104
155
+ .b8 114
156
+ .b8 101
157
+ .b8 105
158
+ .b8 106
159
+ .b8 107
160
+ .b8 55
161
+ .b8 115
162
+ .b8 53
163
+ .b8 120
164
+ .b8 51
165
+ .b8 50
166
+ .b8 55
167
+ .b8 119
168
+ .b8 111
169
+ .b8 102
170
+ .b8 122
171
+ .b8 53
172
+ .b8 52
173
+ .b8 108
174
+ .b8 119
175
+ .b8 106
176
+ .b8 52
177
+ .b8 107
178
+ .b8 118
179
+ .b8 99
180
+ .b8 116
181
+ .b8 117
182
+ .b8 113
183
+ .b8 100
184
+ .b8 122
185
+ .b8 118
186
+ .b8 50
187
+ .b8 118
188
+ .b8 114
189
+ .b8 102
190
+ .b8 50
191
+ .b8 120
192
+ .b8 121
193
+ .b8 111
194
+ .b8 110
195
+ .b8 115
196
+ .b8 46
197
+ .b8 112
198
+ .b8 121
199
+ .b8 0
200
+ .b32 .debug_line
201
+ .b8 47
202
+ .b8 116
203
+ .b8 109
204
+ .b8 112
205
+ .b8 47
206
+ .b8 116
207
+ .b8 111
208
+ .b8 114
209
+ .b8 99
210
+ .b8 104
211
+ .b8 105
212
+ .b8 110
213
+ .b8 100
214
+ .b8 117
215
+ .b8 99
216
+ .b8 116
217
+ .b8 111
218
+ .b8 114
219
+ .b8 95
220
+ .b8 114
221
+ .b8 111
222
+ .b8 111
223
+ .b8 116
224
+ .b8 47
225
+ .b8 53
226
+ .b8 116
227
+ .b8 0
228
+ .b8 1
229
+ .b64 $L__func_begin0
230
+ .b64 $L__func_end0
231
+ .b8 2
232
+ .b64 $L__func_begin0
233
+ .b64 $L__func_end0
234
+ .b8 1
235
+ .b8 156
236
+ .b8 116
237
+ .b8 114
238
+ .b8 105
239
+ .b8 116
240
+ .b8 111
241
+ .b8 110
242
+ .b8 95
243
+ .b8 95
244
+ .b8 48
245
+ .b8 100
246
+ .b8 49
247
+ .b8 100
248
+ .b8 50
249
+ .b8 100
250
+ .b8 101
251
+ .b8 0
252
+ .b8 116
253
+ .b8 114
254
+ .b8 105
255
+ .b8 116
256
+ .b8 111
257
+ .b8 110
258
+ .b8 95
259
+ .b8 95
260
+ .b8 48
261
+ .b8 100
262
+ .b8 49
263
+ .b8 100
264
+ .b8 50
265
+ .b8 100
266
+ .b8 101
267
+ .b8 0
268
+ .b8 1
269
+ .b8 18
270
+ .b8 1
271
+ .b8 0
272
+ }
273
+ .section .debug_pubnames
274
+ {
275
+ .b32 $L__pubNames_end0-$L__pubNames_start0
276
+ $L__pubNames_start0:
277
+ .b8 2
278
+ .b8 0
279
+ .b32 .debug_info
280
+ .b32 180
281
+ .b32 125
282
+ .b8 116
283
+ .b8 114
284
+ .b8 105
285
+ .b8 116
286
+ .b8 111
287
+ .b8 110
288
+ .b8 95
289
+ .b8 95
290
+ .b8 48
291
+ .b8 100
292
+ .b8 49
293
+ .b8 100
294
+ .b8 50
295
+ .b8 100
296
+ .b8 101
297
+ .b8 0
298
+ .b32 0
299
+ $L__pubNames_end0:
300
+ }
301
+ .section .debug_pubtypes
302
+ {
303
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
304
+ $L__pubTypes_start0:
305
+ .b8 2
306
+ .b8 0
307
+ .b32 .debug_info
308
+ .b32 180
309
+ .b32 0
310
+ $L__pubTypes_end0:
311
+ }
312
+ .section .debug_loc { }
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.cubin ADDED
Binary file (10.5 kB). View file
 
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.llir ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ; ModuleID = 'LLVMDialectModule'
2
+ source_filename = "LLVMDialectModule"
3
+
4
+ @.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
5
+
6
+ define void @triton__0d1de(ptr addrspace(1) %0, i32 %1) local_unnamed_addr !dbg !7 {
7
+ %3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
8
+ %4 = shl i32 %3, 1, !dbg !10
9
+ %5 = and i32 %4, 510, !dbg !10
10
+ %6 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #4, !dbg !11
11
+ %7 = shl i32 %6, 9, !dbg !12
12
+ %8 = or i32 %7, %5, !dbg !13
13
+ %9 = sext i32 %8 to i64, !dbg !14
14
+ %10 = getelementptr i16, ptr addrspace(1) %0, i64 %9, !dbg !14
15
+ %11 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %10, i1 true) #4, !dbg !15
16
+ %12 = trunc i32 %11 to i16, !dbg !15
17
+ %extelt.offset = lshr i32 %11, 16, !dbg !15
18
+ %13 = trunc i32 %extelt.offset to i16, !dbg !15
19
+ %14 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %12) #4, !dbg !16
20
+ %15 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %13) #4, !dbg !16
21
+ %16 = fmul float %14, 0x3FE6A09E60000000, !dbg !17
22
+ %17 = fmul float %15, 0x3FE6A09E60000000, !dbg !17
23
+ %18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
24
+ %.not.i = icmp eq i32 %18, 0, !dbg !18
25
+ %19 = tail call float @llvm.nvvm.fabs.ftz.f(float %16) #4, !dbg !18
26
+ %20 = tail call float @llvm.nvvm.fabs.f(float %16) #4, !dbg !18
27
+ %.0.i = select i1 %.not.i, float %20, float %19, !dbg !18
28
+ %21 = fcmp oge float %.0.i, 0x3FF00C1FC0000000, !dbg !18
29
+ br i1 %21, label %__nv_fabsf.exit1.i, label %23, !dbg !18
30
+
31
+ __nv_fabsf.exit1.i: ; preds = %2
32
+ %22 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
33
+ %.not1.i = icmp eq i32 %22, 0, !dbg !18
34
+ %.01.i = select i1 %.not1.i, float %20, float %19, !dbg !18
35
+ br label %__internal_fmad.exit.i, !dbg !18
36
+
37
+ 23: ; preds = %2
38
+ %24 = fmul float %16, %16, !dbg !18
39
+ br label %__internal_fmad.exit.i, !dbg !18
40
+
41
+ __internal_fmad.exit.i: ; preds = %23, %__nv_fabsf.exit1.i
42
+ %25 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i ], [ 0x3FC06EBA60000000, %23 ], !dbg !18
43
+ %26 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i ], [ 0xBFD8127580000000, %23 ], !dbg !18
44
+ %27 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i ], [ 0x3FBCE315E0000000, %23 ], !dbg !18
45
+ %28 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i ], [ 0xBF9B837CE0000000, %23 ], !dbg !18
46
+ %29 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i ], [ 0x3F755ABD40000000, %23 ], !dbg !18
47
+ %30 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i ], [ 0xBF4AE9A400000000, %23 ], !dbg !18
48
+ %31 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i ], [ 0x3F163D2D40000000, %23 ], !dbg !18
49
+ %32 = phi float [ %.01.i, %__nv_fabsf.exit1.i ], [ %24, %23 ], !dbg !18
50
+ %33 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
51
+ %.not2.i = icmp eq i32 %33, 0, !dbg !18
52
+ %34 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %31, float %32, float %30) #4, !dbg !18
53
+ %35 = tail call float @llvm.nvvm.fma.rn.f(float %31, float %32, float %30) #4, !dbg !18
54
+ %.02.i = select i1 %.not2.i, float %35, float %34, !dbg !18
55
+ %36 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
56
+ %.not3.i = icmp eq i32 %36, 0, !dbg !18
57
+ %37 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float %32, float %29) #4, !dbg !18
58
+ %38 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float %32, float %29) #4, !dbg !18
59
+ %.03.i = select i1 %.not3.i, float %38, float %37, !dbg !18
60
+ %39 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
61
+ %.not4.i = icmp eq i32 %39, 0, !dbg !18
62
+ %40 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i, float %32, float %28) #4, !dbg !18
63
+ %41 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i, float %32, float %28) #4, !dbg !18
64
+ %.04.i = select i1 %.not4.i, float %41, float %40, !dbg !18
65
+ %42 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
66
+ %.not5.i = icmp eq i32 %42, 0, !dbg !18
67
+ %43 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i, float %32, float %27) #4, !dbg !18
68
+ %44 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i, float %32, float %27) #4, !dbg !18
69
+ %.05.i = select i1 %.not5.i, float %44, float %43, !dbg !18
70
+ %45 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
71
+ %.not6.i = icmp eq i32 %45, 0, !dbg !18
72
+ %46 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %32, float %26) #4, !dbg !18
73
+ %47 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %32, float %26) #4, !dbg !18
74
+ %.06.i = select i1 %.not6.i, float %47, float %46, !dbg !18
75
+ %48 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
76
+ %.not7.i = icmp eq i32 %48, 0, !dbg !18
77
+ %49 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i, float %32, float %25) #4, !dbg !18
78
+ %50 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i, float %32, float %25) #4, !dbg !18
79
+ %.07.i = select i1 %.not7.i, float %50, float %49, !dbg !18
80
+ %51 = fneg float %32, !dbg !18
81
+ %52 = select i1 %21, float %51, float %16, !dbg !18
82
+ %53 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
83
+ %.not8.i = icmp eq i32 %53, 0, !dbg !18
84
+ %54 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i, float %52, float %52) #4, !dbg !18
85
+ %55 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i, float %52, float %52) #4, !dbg !18
86
+ %.08.i = select i1 %.not8.i, float %55, float %54, !dbg !18
87
+ br i1 %21, label %56, label %__nv_erff.exit, !dbg !18
88
+
89
+ 56: ; preds = %__internal_fmad.exit.i
90
+ %57 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i) #4, !dbg !18
91
+ %58 = fsub float 1.000000e+00, %57, !dbg !18
92
+ %59 = bitcast float %58 to i32, !dbg !18
93
+ %60 = bitcast float %16 to i32, !dbg !18
94
+ %61 = and i32 %60, -2147483648, !dbg !18
95
+ %62 = or i32 %61, %59, !dbg !18
96
+ %63 = bitcast i32 %62 to float, !dbg !18
97
+ br label %__nv_erff.exit, !dbg !18
98
+
99
+ __nv_erff.exit: ; preds = %__internal_fmad.exit.i, %56
100
+ %r.0.i = phi float [ %63, %56 ], [ %.08.i, %__internal_fmad.exit.i ], !dbg !18
101
+ %64 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
102
+ %.not.i1 = icmp eq i32 %64, 0, !dbg !18
103
+ %65 = tail call float @llvm.nvvm.fabs.ftz.f(float %17) #4, !dbg !18
104
+ %66 = tail call float @llvm.nvvm.fabs.f(float %17) #4, !dbg !18
105
+ %.0.i2 = select i1 %.not.i1, float %66, float %65, !dbg !18
106
+ %67 = fcmp oge float %.0.i2, 0x3FF00C1FC0000000, !dbg !18
107
+ br i1 %67, label %__nv_fabsf.exit1.i19, label %69, !dbg !18
108
+
109
+ __nv_fabsf.exit1.i19: ; preds = %__nv_erff.exit
110
+ %68 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
111
+ %.not1.i20 = icmp eq i32 %68, 0, !dbg !18
112
+ %.01.i21 = select i1 %.not1.i20, float %66, float %65, !dbg !18
113
+ br label %__internal_fmad.exit.i3, !dbg !18
114
+
115
+ 69: ; preds = %__nv_erff.exit
116
+ %70 = fmul float %17, %17, !dbg !18
117
+ br label %__internal_fmad.exit.i3, !dbg !18
118
+
119
+ __internal_fmad.exit.i3: ; preds = %69, %__nv_fabsf.exit1.i19
120
+ %71 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i19 ], [ 0x3FC06EBA60000000, %69 ], !dbg !18
121
+ %72 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i19 ], [ 0xBFD8127580000000, %69 ], !dbg !18
122
+ %73 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i19 ], [ 0x3FBCE315E0000000, %69 ], !dbg !18
123
+ %74 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i19 ], [ 0xBF9B837CE0000000, %69 ], !dbg !18
124
+ %75 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i19 ], [ 0x3F755ABD40000000, %69 ], !dbg !18
125
+ %76 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i19 ], [ 0xBF4AE9A400000000, %69 ], !dbg !18
126
+ %77 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i19 ], [ 0x3F163D2D40000000, %69 ], !dbg !18
127
+ %78 = phi float [ %.01.i21, %__nv_fabsf.exit1.i19 ], [ %70, %69 ], !dbg !18
128
+ %79 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
129
+ %.not2.i4 = icmp eq i32 %79, 0, !dbg !18
130
+ %80 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %77, float %78, float %76) #4, !dbg !18
131
+ %81 = tail call float @llvm.nvvm.fma.rn.f(float %77, float %78, float %76) #4, !dbg !18
132
+ %.02.i5 = select i1 %.not2.i4, float %81, float %80, !dbg !18
133
+ %82 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
134
+ %.not3.i6 = icmp eq i32 %82, 0, !dbg !18
135
+ %83 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i5, float %78, float %75) #4, !dbg !18
136
+ %84 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i5, float %78, float %75) #4, !dbg !18
137
+ %.03.i7 = select i1 %.not3.i6, float %84, float %83, !dbg !18
138
+ %85 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
139
+ %.not4.i8 = icmp eq i32 %85, 0, !dbg !18
140
+ %86 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i7, float %78, float %74) #4, !dbg !18
141
+ %87 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i7, float %78, float %74) #4, !dbg !18
142
+ %.04.i9 = select i1 %.not4.i8, float %87, float %86, !dbg !18
143
+ %88 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
144
+ %.not5.i10 = icmp eq i32 %88, 0, !dbg !18
145
+ %89 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i9, float %78, float %73) #4, !dbg !18
146
+ %90 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i9, float %78, float %73) #4, !dbg !18
147
+ %.05.i11 = select i1 %.not5.i10, float %90, float %89, !dbg !18
148
+ %91 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
149
+ %.not6.i12 = icmp eq i32 %91, 0, !dbg !18
150
+ %92 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i11, float %78, float %72) #4, !dbg !18
151
+ %93 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i11, float %78, float %72) #4, !dbg !18
152
+ %.06.i13 = select i1 %.not6.i12, float %93, float %92, !dbg !18
153
+ %94 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
154
+ %.not7.i14 = icmp eq i32 %94, 0, !dbg !18
155
+ %95 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i13, float %78, float %71) #4, !dbg !18
156
+ %96 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i13, float %78, float %71) #4, !dbg !18
157
+ %.07.i15 = select i1 %.not7.i14, float %96, float %95, !dbg !18
158
+ %97 = fneg float %78, !dbg !18
159
+ %98 = select i1 %67, float %97, float %17, !dbg !18
160
+ %99 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
161
+ %.not8.i16 = icmp eq i32 %99, 0, !dbg !18
162
+ %100 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i15, float %98, float %98) #4, !dbg !18
163
+ %101 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i15, float %98, float %98) #4, !dbg !18
164
+ %.08.i17 = select i1 %.not8.i16, float %101, float %100, !dbg !18
165
+ br i1 %67, label %102, label %__nv_erff.exit22, !dbg !18
166
+
167
+ 102: ; preds = %__internal_fmad.exit.i3
168
+ %103 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i17) #4, !dbg !18
169
+ %104 = fsub float 1.000000e+00, %103, !dbg !18
170
+ %105 = bitcast float %104 to i32, !dbg !18
171
+ %106 = bitcast float %17 to i32, !dbg !18
172
+ %107 = and i32 %106, -2147483648, !dbg !18
173
+ %108 = or i32 %107, %105, !dbg !18
174
+ %109 = bitcast i32 %108 to float, !dbg !18
175
+ br label %__nv_erff.exit22, !dbg !18
176
+
177
+ __nv_erff.exit22: ; preds = %__internal_fmad.exit.i3, %102
178
+ %r.0.i18 = phi float [ %109, %102 ], [ %.08.i17, %__internal_fmad.exit.i3 ], !dbg !18
179
+ %110 = fmul float %15, 5.000000e-01, !dbg !19
180
+ %111 = fmul float %14, 5.000000e-01, !dbg !19
181
+ %112 = fadd float %r.0.i, 1.000000e+00, !dbg !20
182
+ %113 = fadd float %r.0.i18, 1.000000e+00, !dbg !20
183
+ %114 = fmul float %111, %112, !dbg !21
184
+ %115 = fmul float %110, %113, !dbg !21
185
+ %116 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %114) #4, !dbg !22
186
+ %117 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %115) #4, !dbg !22
187
+ %118 = insertelement <2 x i16> undef, i16 %116, i64 0, !dbg !22
188
+ %119 = insertelement <2 x i16> %118, i16 %117, i64 1, !dbg !22
189
+ %120 = bitcast <2 x i16> %119 to i32, !dbg !22
190
+ tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %120, ptr addrspace(1) %10, i1 true) #4, !dbg !22
191
+ ret void, !dbg !23
192
+ }
193
+
194
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
195
+ declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
196
+
197
+ ; Function Attrs: alwaysinline nounwind
198
+ define float @__nv_erff(float %a) local_unnamed_addr #1 {
199
+ __nv_fabsf.exit:
200
+ %0 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
201
+ %.not = icmp eq i32 %0, 0
202
+ %1 = tail call float @llvm.nvvm.fabs.ftz.f(float %a) #4
203
+ %2 = tail call float @llvm.nvvm.fabs.f(float %a) #4
204
+ %.0 = select i1 %.not, float %2, float %1
205
+ %3 = fcmp oge float %.0, 0x3FF00C1FC0000000
206
+ br i1 %3, label %__nv_fabsf.exit1, label %5
207
+
208
+ __nv_fabsf.exit1: ; preds = %__nv_fabsf.exit
209
+ %4 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
210
+ %.not1 = icmp eq i32 %4, 0
211
+ %.01 = select i1 %.not1, float %2, float %1
212
+ br label %__internal_fmad.exit
213
+
214
+ 5: ; preds = %__nv_fabsf.exit
215
+ %6 = fmul float %a, %a
216
+ br label %__internal_fmad.exit
217
+
218
+ __internal_fmad.exit: ; preds = %5, %__nv_fabsf.exit1
219
+ %7 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1 ], [ 0x3FC06EBA60000000, %5 ]
220
+ %8 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1 ], [ 0xBFD8127580000000, %5 ]
221
+ %9 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1 ], [ 0x3FBCE315E0000000, %5 ]
222
+ %10 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1 ], [ 0xBF9B837CE0000000, %5 ]
223
+ %11 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1 ], [ 0x3F755ABD40000000, %5 ]
224
+ %12 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1 ], [ 0xBF4AE9A400000000, %5 ]
225
+ %13 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1 ], [ 0x3F163D2D40000000, %5 ]
226
+ %14 = phi float [ %.01, %__nv_fabsf.exit1 ], [ %6, %5 ]
227
+ %15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
228
+ %.not2 = icmp eq i32 %15, 0
229
+ %16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %13, float %14, float %12) #4
230
+ %17 = tail call float @llvm.nvvm.fma.rn.f(float %13, float %14, float %12) #4
231
+ %.02 = select i1 %.not2, float %17, float %16
232
+ %18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
233
+ %.not3 = icmp eq i32 %18, 0
234
+ %19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float %14, float %11) #4
235
+ %20 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float %14, float %11) #4
236
+ %.03 = select i1 %.not3, float %20, float %19
237
+ %21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
238
+ %.not4 = icmp eq i32 %21, 0
239
+ %22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03, float %14, float %10) #4
240
+ %23 = tail call float @llvm.nvvm.fma.rn.f(float %.03, float %14, float %10) #4
241
+ %.04 = select i1 %.not4, float %23, float %22
242
+ %24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
243
+ %.not5 = icmp eq i32 %24, 0
244
+ %25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04, float %14, float %9) #4
245
+ %26 = tail call float @llvm.nvvm.fma.rn.f(float %.04, float %14, float %9) #4
246
+ %.05 = select i1 %.not5, float %26, float %25
247
+ %27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
248
+ %.not6 = icmp eq i32 %27, 0
249
+ %28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %14, float %8) #4
250
+ %29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %14, float %8) #4
251
+ %.06 = select i1 %.not6, float %29, float %28
252
+ %30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
253
+ %.not7 = icmp eq i32 %30, 0
254
+ %31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06, float %14, float %7) #4
255
+ %32 = tail call float @llvm.nvvm.fma.rn.f(float %.06, float %14, float %7) #4
256
+ %.07 = select i1 %.not7, float %32, float %31
257
+ %33 = fneg float %14
258
+ %34 = select i1 %3, float %33, float %a
259
+ %35 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
260
+ %.not8 = icmp eq i32 %35, 0
261
+ %36 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07, float %34, float %34) #4
262
+ %37 = tail call float @llvm.nvvm.fma.rn.f(float %.07, float %34, float %34) #4
263
+ %.08 = select i1 %.not8, float %37, float %36
264
+ br i1 %3, label %38, label %46
265
+
266
+ 38: ; preds = %__internal_fmad.exit
267
+ %39 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08) #4
268
+ %40 = fsub float 1.000000e+00, %39
269
+ %41 = bitcast float %40 to i32
270
+ %42 = bitcast float %a to i32
271
+ %43 = and i32 %42, -2147483648
272
+ %44 = or i32 %43, %41
273
+ %45 = bitcast i32 %44 to float
274
+ br label %46
275
+
276
+ 46: ; preds = %38, %__internal_fmad.exit
277
+ %r.0 = phi float [ %45, %38 ], [ %.08, %__internal_fmad.exit ]
278
+ ret float %r.0
279
+ }
280
+
281
+ declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #2
282
+
283
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
284
+ declare float @llvm.nvvm.fabs.ftz.f(float) #0
285
+
286
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
287
+ declare float @llvm.nvvm.fabs.f(float) #0
288
+
289
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
290
+ declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0
291
+
292
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
293
+ declare float @llvm.nvvm.fma.rn.f(float, float, float) #0
294
+
295
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
296
+ declare float @llvm.nvvm.ex2.approx.ftz.f(float) #3
297
+
298
+ attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
299
+ attributes #1 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
300
+ attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
301
+ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
302
+ attributes #4 = { nounwind }
303
+
304
+ !llvm.module.flags = !{!0, !1}
305
+ !llvm.dbg.cu = !{!2}
306
+ !nvvm.annotations = !{!4, !5, !5, !4}
307
+ !llvm.ident = !{!6}
308
+
309
+ !0 = !{i32 2, !"Debug Info Version", i32 3}
310
+ !1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
311
+ !2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
312
+ !3 = !DIFile(filename: "cafucwnmq4o436kwzkmrinerrnocxll7q6wsadcl726g6cradipo.py", directory: "/tmp/torchinductor_root/af")
313
+ !4 = !{ptr @triton__0d1de, !"kernel", i32 1}
314
+ !5 = !{ptr @triton__0d1de, !"maxntidx", i32 256}
315
+ !6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
316
+ !7 = distinct !DISubprogram(name: "triton__0d1de", linkageName: "triton__0d1de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
317
+ !8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
318
+ !9 = !{}
319
+ !10 = !DILocation(line: 21, column: 36, scope: !7)
320
+ !11 = !DILocation(line: 20, column: 28, scope: !7)
321
+ !12 = !DILocation(line: 20, column: 33, scope: !7)
322
+ !13 = !DILocation(line: 21, column: 23, scope: !7)
323
+ !14 = !DILocation(line: 24, column: 34, scope: !7)
324
+ !15 = !DILocation(line: 24, column: 39, scope: !7)
325
+ !16 = !DILocation(line: 24, column: 48, scope: !7)
326
+ !17 = !DILocation(line: 29, column: 18, scope: !7)
327
+ !18 = !DILocation(line: 30, column: 23, scope: !7)
328
+ !19 = !DILocation(line: 27, column: 18, scope: !7)
329
+ !20 = !DILocation(line: 32, column: 18, scope: !7)
330
+ !21 = !DILocation(line: 33, column: 18, scope: !7)
331
+ !22 = !DILocation(line: 35, column: 40, scope: !7)
332
+ !23 = !DILocation(line: 35, column: 4, scope: !7)
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ptx ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1de
10
+ .global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
11
+
12
+ .visible .entry triton__0d1de(
13
+ .param .u64 triton__0d1de_param_0,
14
+ .param .u32 triton__0d1de_param_1
15
+ )
16
+ .maxntid 256, 1, 1
17
+ {
18
+ .reg .pred %p<9>;
19
+ .reg .b16 %rs<5>;
20
+ .reg .b32 %r<22>;
21
+ .reg .f32 %f<113>;
22
+ .reg .b64 %rd<6>;
23
+ .loc 1 18 0
24
+ $L__func_begin0:
25
+ .loc 1 18 0
26
+
27
+ ld.param.u64 %rd3, [triton__0d1de_param_0];
28
+ $L__tmp0:
29
+ .loc 1 21 36
30
+ mov.u32 %r5, %tid.x;
31
+ shl.b32 %r6, %r5, 1;
32
+ and.b32 %r7, %r6, 510;
33
+ .loc 1 20 28
34
+ mov.u32 %r1, %ctaid.x;
35
+ .loc 1 20 33
36
+ shl.b32 %r8, %r1, 9;
37
+ .loc 1 21 23
38
+ or.b32 %r9, %r8, %r7;
39
+ .loc 1 24 34
40
+ mul.wide.s32 %rd4, %r9, 2;
41
+ add.s64 %rd5, %rd3, %rd4;
42
+ mov.pred %p1, -1;
43
+ .loc 1 24 39
44
+ mov.u32 %r2, 0x0;
45
+ @%p1 ld.global.b32 { %r2 }, [ %rd5 + 0 ];
46
+ cvt.u16.u32 %rs1, %r2;
47
+ { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r2; }
48
+ .loc 1 24 48
49
+ cvt.f32.bf16 %r3, %rs1;
50
+ mov.b32 %f1, %r3;
51
+ cvt.f32.bf16 %r4, %rs2;
52
+ mov.b32 %f2, %r4;
53
+ .loc 1 29 18
54
+ mul.f32 %f3, %f1, 0f3F3504F3;
55
+ .loc 1 30 23
56
+ abs.ftz.f32 %f5, %f3;
57
+ setp.ge.f32 %p2, %f5, 0f3F8060FE;
58
+ mov.f32 %f101, 0f3789CA3C;
59
+ mov.f32 %f100, 0fB9F560B9;
60
+ mov.f32 %f99, 0f3BAC840B;
61
+ mov.f32 %f98, 0fBD0C8162;
62
+ mov.f32 %f97, 0f3E1CF906;
63
+ mov.f32 %f96, 0f3F6A937E;
64
+ mov.f32 %f95, 0f3F20D842;
65
+ mov.f32 %f102, %f5;
66
+ @%p2 bra $L__BB0_2;
67
+ .loc 1 0 23
68
+ mov.f32 %f101, 0f38B1E96A;
69
+ mov.f32 %f100, 0fBA574D20;
70
+ mov.f32 %f99, 0f3BAAD5EA;
71
+ mov.f32 %f98, 0fBCDC1BE7;
72
+ mov.f32 %f97, 0f3DE718AF;
73
+ mov.f32 %f96, 0fBEC093AC;
74
+ mov.f32 %f95, 0f3E0375D3;
75
+ .loc 1 30 23
76
+ mul.f32 %f102, %f3, %f3;
77
+ $L__BB0_2:
78
+ .loc 1 0 0
79
+ mul.f32 %f4, %f2, 0f3F3504F3;
80
+ .loc 1 30 23
81
+ setp.ltu.f32 %p3, %f5, 0f3F8060FE;
82
+ fma.rn.ftz.f32 %f45, %f101, %f102, %f100;
83
+ fma.rn.ftz.f32 %f46, %f45, %f102, %f99;
84
+ fma.rn.ftz.f32 %f47, %f46, %f102, %f98;
85
+ fma.rn.ftz.f32 %f48, %f47, %f102, %f97;
86
+ fma.rn.ftz.f32 %f49, %f48, %f102, %f96;
87
+ fma.rn.ftz.f32 %f50, %f49, %f102, %f95;
88
+ neg.f32 %f51, %f102;
89
+ selp.f32 %f52, %f51, %f3, %p2;
90
+ fma.rn.ftz.f32 %f103, %f50, %f52, %f52;
91
+ mov.f32 %f94, 0f3F800000;
92
+ @%p3 bra $L__BB0_4;
93
+ ex2.approx.ftz.f32 %f53, %f103;
94
+ sub.f32 %f55, %f94, %f53;
95
+ mov.b32 %r10, %f55;
96
+ mov.b32 %r11, %f3;
97
+ and.b32 %r12, %r11, -2147483648;
98
+ or.b32 %r13, %r12, %r10;
99
+ mov.b32 %f103, %r13;
100
+ $L__BB0_4:
101
+ abs.ftz.f32 %f18, %f4;
102
+ setp.ge.f32 %p5, %f18, 0f3F8060FE;
103
+ mov.f32 %f110, 0f3789CA3C;
104
+ mov.f32 %f109, 0fB9F560B9;
105
+ mov.f32 %f108, 0f3BAC840B;
106
+ mov.f32 %f107, 0fBD0C8162;
107
+ mov.f32 %f106, 0f3E1CF906;
108
+ mov.f32 %f105, 0f3F6A937E;
109
+ mov.f32 %f104, 0f3F20D842;
110
+ mov.f32 %f111, %f18;
111
+ @%p5 bra $L__BB0_6;
112
+ mul.f32 %f111, %f4, %f4;
113
+ mov.f32 %f110, 0f38B1E96A;
114
+ mov.f32 %f109, 0fBA574D20;
115
+ mov.f32 %f108, 0f3BAAD5EA;
116
+ mov.f32 %f107, 0fBCDC1BE7;
117
+ mov.f32 %f106, 0f3DE718AF;
118
+ mov.f32 %f105, 0fBEC093AC;
119
+ mov.f32 %f104, 0f3E0375D3;
120
+ $L__BB0_6:
121
+ setp.ltu.f32 %p6, %f18, 0f3F8060FE;
122
+ fma.rn.ftz.f32 %f70, %f110, %f111, %f109;
123
+ fma.rn.ftz.f32 %f71, %f70, %f111, %f108;
124
+ fma.rn.ftz.f32 %f72, %f71, %f111, %f107;
125
+ fma.rn.ftz.f32 %f73, %f72, %f111, %f106;
126
+ fma.rn.ftz.f32 %f74, %f73, %f111, %f105;
127
+ fma.rn.ftz.f32 %f75, %f74, %f111, %f104;
128
+ neg.f32 %f76, %f111;
129
+ selp.f32 %f77, %f76, %f4, %p5;
130
+ fma.rn.ftz.f32 %f112, %f75, %f77, %f77;
131
+ @%p6 bra $L__BB0_8;
132
+ ex2.approx.ftz.f32 %f78, %f112;
133
+ sub.f32 %f80, %f94, %f78;
134
+ mov.b32 %r14, %f80;
135
+ mov.b32 %r15, %f4;
136
+ and.b32 %r16, %r15, -2147483648;
137
+ or.b32 %r17, %r16, %r14;
138
+ mov.b32 %f112, %r17;
139
+ $L__BB0_8:
140
+ .loc 1 27 18
141
+ mul.f32 %f81, %f2, 0f3F000000;
142
+ mul.f32 %f82, %f1, 0f3F000000;
143
+ .loc 1 32 18
144
+ add.f32 %f83, %f103, 0f3F800000;
145
+ add.f32 %f84, %f112, 0f3F800000;
146
+ .loc 1 33 18
147
+ mul.f32 %f85, %f82, %f83;
148
+ mul.f32 %f86, %f81, %f84;
149
+ .loc 1 35 40
150
+ mov.b32 %r18, %f85;
151
+ cvt.rn.bf16.f32 %rs3, %r18;
152
+ mov.b32 %r19, %f86;
153
+ cvt.rn.bf16.f32 %rs4, %r19;
154
+ mov.b32 %r21, {%rs3, %rs4};
155
+ @%p1 st.global.b32 [ %rd5 + 0 ], { %r21 };
156
+ .loc 1 35 4
157
+ ret;
158
+ $L__tmp1:
159
+ $L__func_end0:
160
+
161
+ }
162
+ // .globl __nv_erff
163
+ .visible .func (.param .b32 func_retval0) __nv_erff(
164
+ .param .b32 __nv_erff_param_0
165
+ )
166
+ {
167
+ .reg .pred %p<4>;
168
+ .reg .b32 %r<5>;
169
+ .reg .f32 %f<49>;
170
+ $L__func_begin1:
171
+
172
+ ld.param.f32 %f14, [__nv_erff_param_0];
173
+ abs.ftz.f32 %f1, %f14;
174
+ setp.ge.f32 %p1, %f1, 0f3F8060FE;
175
+ mov.f32 %f46, 0f3789CA3C;
176
+ mov.f32 %f45, 0fB9F560B9;
177
+ mov.f32 %f44, 0f3BAC840B;
178
+ mov.f32 %f43, 0fBD0C8162;
179
+ mov.f32 %f42, 0f3E1CF906;
180
+ mov.f32 %f41, 0f3F6A937E;
181
+ mov.f32 %f40, 0f3F20D842;
182
+ mov.f32 %f47, %f1;
183
+ @%p1 bra $L__BB1_2;
184
+ mul.f32 %f47, %f14, %f14;
185
+ mov.f32 %f46, 0f38B1E96A;
186
+ mov.f32 %f45, 0fBA574D20;
187
+ mov.f32 %f44, 0f3BAAD5EA;
188
+ mov.f32 %f43, 0fBCDC1BE7;
189
+ mov.f32 %f42, 0f3DE718AF;
190
+ mov.f32 %f41, 0fBEC093AC;
191
+ mov.f32 %f40, 0f3E0375D3;
192
+ $L__BB1_2:
193
+ setp.ltu.f32 %p2, %f1, 0f3F8060FE;
194
+ fma.rn.ftz.f32 %f29, %f46, %f47, %f45;
195
+ fma.rn.ftz.f32 %f30, %f29, %f47, %f44;
196
+ fma.rn.ftz.f32 %f31, %f30, %f47, %f43;
197
+ fma.rn.ftz.f32 %f32, %f31, %f47, %f42;
198
+ fma.rn.ftz.f32 %f33, %f32, %f47, %f41;
199
+ fma.rn.ftz.f32 %f34, %f33, %f47, %f40;
200
+ neg.f32 %f35, %f47;
201
+ selp.f32 %f36, %f35, %f14, %p1;
202
+ fma.rn.ftz.f32 %f48, %f34, %f36, %f36;
203
+ @%p2 bra $L__BB1_4;
204
+ ex2.approx.ftz.f32 %f37, %f48;
205
+ mov.f32 %f38, 0f3F800000;
206
+ sub.f32 %f39, %f38, %f37;
207
+ mov.b32 %r1, %f39;
208
+ mov.b32 %r2, %f14;
209
+ and.b32 %r3, %r2, -2147483648;
210
+ or.b32 %r4, %r3, %r1;
211
+ mov.b32 %f48, %r4;
212
+ $L__BB1_4:
213
+ st.param.f32 [func_retval0+0], %f48;
214
+ ret;
215
+ $L__func_end1:
216
+
217
+ }
218
+ .file 1 "/tmp/torchinductor_root/af/cafucwnmq4o436kwzkmrinerrnocxll7q6wsadcl726g6cradipo.py"
219
+ .section .debug_abbrev
220
+ {
221
+ .b8 1
222
+ .b8 17
223
+ .b8 1
224
+ .b8 37
225
+ .b8 8
226
+ .b8 19
227
+ .b8 5
228
+ .b8 3
229
+ .b8 8
230
+ .b8 16
231
+ .b8 6
232
+ .b8 27
233
+ .b8 8
234
+ .b8 180
235
+ .b8 66
236
+ .b8 12
237
+ .b8 17
238
+ .b8 1
239
+ .b8 18
240
+ .b8 1
241
+ .b8 0
242
+ .b8 0
243
+ .b8 2
244
+ .b8 46
245
+ .b8 0
246
+ .b8 17
247
+ .b8 1
248
+ .b8 18
249
+ .b8 1
250
+ .b8 64
251
+ .b8 10
252
+ .b8 135
253
+ .b8 64
254
+ .b8 8
255
+ .b8 3
256
+ .b8 8
257
+ .b8 58
258
+ .b8 11
259
+ .b8 59
260
+ .b8 11
261
+ .b8 63
262
+ .b8 12
263
+ .b8 0
264
+ .b8 0
265
+ .b8 0
266
+ }
267
+ .section .debug_info
268
+ {
269
+ .b32 172
270
+ .b8 2
271
+ .b8 0
272
+ .b32 .debug_abbrev
273
+ .b8 8
274
+ .b8 1
275
+ .b8 116
276
+ .b8 114
277
+ .b8 105
278
+ .b8 116
279
+ .b8 111
280
+ .b8 110
281
+ .b8 0
282
+ .b8 2
283
+ .b8 0
284
+ .b8 99
285
+ .b8 97
286
+ .b8 102
287
+ .b8 117
288
+ .b8 99
289
+ .b8 119
290
+ .b8 110
291
+ .b8 109
292
+ .b8 113
293
+ .b8 52
294
+ .b8 111
295
+ .b8 52
296
+ .b8 51
297
+ .b8 54
298
+ .b8 107
299
+ .b8 119
300
+ .b8 122
301
+ .b8 107
302
+ .b8 109
303
+ .b8 114
304
+ .b8 105
305
+ .b8 110
306
+ .b8 101
307
+ .b8 114
308
+ .b8 114
309
+ .b8 110
310
+ .b8 111
311
+ .b8 99
312
+ .b8 120
313
+ .b8 108
314
+ .b8 108
315
+ .b8 55
316
+ .b8 113
317
+ .b8 54
318
+ .b8 119
319
+ .b8 115
320
+ .b8 97
321
+ .b8 100
322
+ .b8 99
323
+ .b8 108
324
+ .b8 55
325
+ .b8 50
326
+ .b8 54
327
+ .b8 103
328
+ .b8 54
329
+ .b8 99
330
+ .b8 114
331
+ .b8 97
332
+ .b8 100
333
+ .b8 105
334
+ .b8 112
335
+ .b8 111
336
+ .b8 46
337
+ .b8 112
338
+ .b8 121
339
+ .b8 0
340
+ .b32 .debug_line
341
+ .b8 47
342
+ .b8 116
343
+ .b8 109
344
+ .b8 112
345
+ .b8 47
346
+ .b8 116
347
+ .b8 111
348
+ .b8 114
349
+ .b8 99
350
+ .b8 104
351
+ .b8 105
352
+ .b8 110
353
+ .b8 100
354
+ .b8 117
355
+ .b8 99
356
+ .b8 116
357
+ .b8 111
358
+ .b8 114
359
+ .b8 95
360
+ .b8 114
361
+ .b8 111
362
+ .b8 111
363
+ .b8 116
364
+ .b8 47
365
+ .b8 97
366
+ .b8 102
367
+ .b8 0
368
+ .b8 1
369
+ .b64 $L__func_begin0
370
+ .b64 $L__func_end0
371
+ .b8 2
372
+ .b64 $L__func_begin0
373
+ .b64 $L__func_end0
374
+ .b8 1
375
+ .b8 156
376
+ .b8 116
377
+ .b8 114
378
+ .b8 105
379
+ .b8 116
380
+ .b8 111
381
+ .b8 110
382
+ .b8 95
383
+ .b8 95
384
+ .b8 48
385
+ .b8 100
386
+ .b8 49
387
+ .b8 100
388
+ .b8 101
389
+ .b8 0
390
+ .b8 116
391
+ .b8 114
392
+ .b8 105
393
+ .b8 116
394
+ .b8 111
395
+ .b8 110
396
+ .b8 95
397
+ .b8 95
398
+ .b8 48
399
+ .b8 100
400
+ .b8 49
401
+ .b8 100
402
+ .b8 101
403
+ .b8 0
404
+ .b8 1
405
+ .b8 18
406
+ .b8 1
407
+ .b8 0
408
+ }
409
+ .section .debug_pubnames
410
+ {
411
+ .b32 $L__pubNames_end0-$L__pubNames_start0
412
+ $L__pubNames_start0:
413
+ .b8 2
414
+ .b8 0
415
+ .b32 .debug_info
416
+ .b32 176
417
+ .b32 125
418
+ .b8 116
419
+ .b8 114
420
+ .b8 105
421
+ .b8 116
422
+ .b8 111
423
+ .b8 110
424
+ .b8 95
425
+ .b8 95
426
+ .b8 48
427
+ .b8 100
428
+ .b8 49
429
+ .b8 100
430
+ .b8 101
431
+ .b8 0
432
+ .b32 0
433
+ $L__pubNames_end0:
434
+ }
435
+ .section .debug_pubtypes
436
+ {
437
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
438
+ $L__pubTypes_start0:
439
+ .b8 2
440
+ .b8 0
441
+ .b32 .debug_info
442
+ .b32 176
443
+ .b32 0
444
+ $L__pubTypes_end0:
445
+ }
446
+ .section .debug_loc { }
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ttgir ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #blocked = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [8], order = [0], CTAsPerCGA = [1], CTASplitNum = [1], CTAOrder = [0]}>
2
+ module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
3
+ tt.func public @triton__0d1de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
4
+ %cst = arith.constant dense<1.000000e+00> : tensor<512xf32, #blocked>
5
+ %cst_0 = arith.constant dense<0.707106769> : tensor<512xf32, #blocked>
6
+ %cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32, #blocked>
7
+ %c512_i32 = arith.constant 512 : i32
8
+ %0 = tt.get_program_id x : i32
9
+ %1 = arith.muli %0, %c512_i32 : i32
10
+ %2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked>
11
+ %3 = tt.splat %1 : (i32) -> tensor<512xi32, #blocked>
12
+ %4 = arith.addi %3, %2 : tensor<512xi32, #blocked>
13
+ %5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
14
+ %6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
15
+ %7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16, #blocked>
16
+ %8 = arith.extf %7 : tensor<512xbf16, #blocked> to tensor<512xf32, #blocked>
17
+ %9 = arith.mulf %8, %cst_1 : tensor<512xf32, #blocked>
18
+ %10 = arith.mulf %8, %cst_0 : tensor<512xf32, #blocked>
19
+ %11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32, #blocked>) -> tensor<512xf32, #blocked>
20
+ %12 = arith.addf %11, %cst : tensor<512xf32, #blocked>
21
+ %13 = arith.mulf %9, %12 : tensor<512xf32, #blocked>
22
+ %14 = arith.truncf %13 : tensor<512xf32, #blocked> to tensor<512xbf16, #blocked>
23
+ tt.store %6, %14 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16, #blocked>
24
+ tt.return
25
+ }
26
+ }
.triton/dump/305a9479aab997a3a16bfe46bb303a50/triton_.cubin ADDED
Binary file (30.4 kB). View file
 
.triton/dump/345a87a492fd703c73ab83265a21fcb6/triton_.cubin ADDED
Binary file (52.2 kB). View file
 
.triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ptx ADDED
@@ -0,0 +1,809 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1d2d3d4d5d6de7de
10
+ .extern .func __assertfail
11
+ (
12
+ .param .b64 __assertfail_param_0,
13
+ .param .b64 __assertfail_param_1,
14
+ .param .b32 __assertfail_param_2,
15
+ .param .b64 __assertfail_param_3,
16
+ .param .b64 __assertfail_param_4
17
+ )
18
+ ;
19
+ .global .align 1 .b8 assertFunc_1[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
20
+ .global .align 1 .b8 assertFile_1[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
21
+ .global .align 1 .b8 assertMessage_1[39] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 49, 54, 32, 60, 32, 53, 48, 50, 53, 55};
22
+ .global .align 1 .b8 assertFunc_0[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
23
+ .global .align 1 .b8 assertFile_0[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
24
+ .global .align 1 .b8 assertMessage_0[38] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 51, 32, 60, 32, 53, 48, 50, 53, 55};
25
+ .global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
26
+
27
+ .visible .entry triton__0d1d2d3d4d5d6de7de(
28
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_0,
29
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_1,
30
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_2,
31
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_3,
32
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_4,
33
+ .param .u64 triton__0d1d2d3d4d5d6de7de_param_5,
34
+ .param .u32 triton__0d1d2d3d4d5d6de7de_param_6,
35
+ .param .u32 triton__0d1d2d3d4d5d6de7de_param_7
36
+ )
37
+ .maxntid 256, 1, 1
38
+ {
39
+ .reg .pred %p<33>;
40
+ .reg .b16 %rs<13>;
41
+ .reg .b32 %r<93>;
42
+ .reg .f32 %f<79>;
43
+ .reg .b64 %rd<92>;
44
+ .loc 1 18 0
45
+ $L__func_begin0:
46
+ .loc 1 18 0
47
+
48
+ ld.param.u64 %rd37, [triton__0d1d2d3d4d5d6de7de_param_4];
49
+ ld.param.u64 %rd36, [triton__0d1d2d3d4d5d6de7de_param_3];
50
+ ld.param.u64 %rd35, [triton__0d1d2d3d4d5d6de7de_param_2];
51
+ ld.param.u64 %rd34, [triton__0d1d2d3d4d5d6de7de_param_1];
52
+ ld.param.u64 %rd43, [triton__0d1d2d3d4d5d6de7de_param_0];
53
+ $L__tmp0:
54
+ .loc 1 22 44
55
+ mov.u32 %r1, %tid.x;
56
+ bfe.u32 %r2, %r1, 2, 6;
57
+ and.b32 %r16, %r1, 63;
58
+ .loc 1 24 33
59
+ and.b32 %r3, %r1, 3;
60
+ .loc 1 21 28
61
+ mov.u32 %r15, %ctaid.x;
62
+ .loc 1 21 33
63
+ shl.b32 %r17, %r15, 6;
64
+ .loc 1 22 23
65
+ or.b32 %r18, %r17, %r2;
66
+ or.b32 %r19, %r17, %r16;
67
+ .loc 1 26 30
68
+ mul.wide.s32 %rd44, %r18, 8;
69
+ add.s64 %rd40, %rd43, %rd44;
70
+ mul.wide.s32 %rd45, %r19, 8;
71
+ add.s64 %rd42, %rd43, %rd45;
72
+ mov.pred %p13, -1;
73
+ .loc 1 26 35
74
+ mov.u64 %rd39, 0x0;
75
+ @%p13 ld.global.L1::evict_last.b64 { %rd39 }, [ %rd40 + 0 ];
76
+ mov.u64 %rd41, 0x0;
77
+ @%p13 ld.global.L1::evict_last.b64 { %rd41 }, [ %rd42 + 0 ];
78
+ .loc 1 27 18
79
+ bfe.s32 %r20, %r15, 25, 1;
80
+ shr.u32 %r21, %r20, 23;
81
+ add.s32 %r22, %r18, %r21;
82
+ and.b32 %r23, %r22, 16776704;
83
+ sub.s32 %r24, %r18, %r23;
84
+ .loc 1 35 44
85
+ shl.b32 %r5, %r24, 8;
86
+ .loc 1 37 22
87
+ add.s64 %rd46, %rd41, 50257;
88
+ .loc 1 38 22
89
+ setp.lt.s64 %p3, %rd39, 0;
90
+ setp.lt.s64 %p4, %rd41, 0;
91
+ .loc 1 39 36
92
+ selp.b64 %rd47, %rd46, %rd41, %p4;
93
+ .loc 1 40 40
94
+ setp.gt.u64 %p5, %rd47, 50256;
95
+ .loc 1 41 44
96
+ shl.b64 %rd48, %rd39, 8;
97
+ add.s64 %rd49, %rd48, 12865792;
98
+ selp.b64 %rd2, %rd49, %rd48, %p3;
99
+ mov.u16 %rs12, 0;
100
+ mov.b32 %r76, 0;
101
+ mov.b32 %r88, 883;
102
+ mov.u64 %rd81, 1;
103
+ .loc 1 40 55
104
+ @%p5 bra $L__BB0_3;
105
+ bra.uni $L__BB0_1;
106
+ $L__BB0_3:
107
+ .loc 1 31 36
108
+ shl.b64 %rd55, %rd2, 2;
109
+ mul.wide.u32 %rd88, %r3, 4;
110
+ add.s64 %rd87, %rd55, %rd88;
111
+ add.s64 %rd83, %rd34, %rd87;
112
+ shl.b32 %r42, %r15, 14;
113
+ shl.b32 %r43, %r2, 8;
114
+ or.b32 %r44, %r42, %r43;
115
+ or.b32 %r91, %r44, %r3;
116
+ add.s32 %r45, %r5, %r3;
117
+ mul.wide.s32 %rd86, %r45, 4;
118
+ add.s64 %rd82, %rd35, %rd86;
119
+ mov.f32 %f78, 0f00000000;
120
+ mov.b32 %r89, -4;
121
+ mov.f32 %f77, %f78;
122
+ mov.f32 %f76, %f78;
123
+ $L__BB0_4:
124
+ .loc 1 35 50
125
+ mov.u32 %r46, 0x0;
126
+ @%p13 ld.global.L1::evict_last.b32 { %r46 }, [ %rd82 + 0 ];
127
+ @!%p13 mov.u32 %r46, %r76;
128
+ mov.b32 %f31, %r46;
129
+ .loc 1 31 36
130
+ add.s32 %r89, %r89, 4;
131
+ .loc 1 36 34
132
+ add.s32 %r54, %r89, %r91;
133
+ mul.wide.s32 %rd59, %r54, 2;
134
+ add.s64 %rd57, %rd36, %rd59;
135
+ .loc 1 36 50
136
+ mov.u16 %rs4, 0x0;
137
+ @%p13 ld.global.L1::evict_last.b16 { %rs4 }, [ %rd57 + 0 ];
138
+ @!%p13 mov.u16 %rs4, %rs12;
139
+ .loc 1 36 101
140
+ cvt.f32.bf16 %r48, %rs4;
141
+ mov.b32 %f32, %r48;
142
+ .loc 1 40 55
143
+ mov.u64 %rd60, assertMessage_0;
144
+ cvta.global.u64 %rd61, %rd60;
145
+ mov.u64 %rd62, assertFile_0;
146
+ cvta.global.u64 %rd63, %rd62;
147
+ mov.u64 %rd64, assertFunc_0;
148
+ cvta.global.u64 %rd65, %rd64;
149
+ { // callseq 10, 0
150
+ .reg .b32 temp_param_reg;
151
+ .param .b64 param0;
152
+ st.param.b64 [param0+0], %rd61;
153
+ .param .b64 param1;
154
+ st.param.b64 [param1+0], %rd63;
155
+ .param .b32 param2;
156
+ st.param.b32 [param2+0], %r88;
157
+ .param .b64 param3;
158
+ st.param.b64 [param3+0], %rd65;
159
+ .param .b64 param4;
160
+ st.param.b64 [param4+0], %rd81;
161
+ call.uni
162
+ __assertfail,
163
+ (
164
+ param0,
165
+ param1,
166
+ param2,
167
+ param3,
168
+ param4
169
+ );
170
+ } // callseq 10
171
+ .loc 1 41 52
172
+ mov.u32 %r49, 0x0;
173
+ @%p13 ld.global.L1::evict_last.b32 { %r49 }, [ %rd83 + 0 ];
174
+ @!%p13 mov.u32 %r49, %r76;
175
+ mov.b32 %f33, %r49;
176
+ .loc 1 42 22
177
+ add.f32 %f34, %f31, %f33;
178
+ .loc 1 44 22
179
+ add.f32 %f35, %f32, %f34;
180
+ $L__tmp1:
181
+ .loc 2 96 20
182
+ sub.f32 %f36, %f35, %f76;
183
+ .loc 2 97 26
184
+ add.f32 %f78, %f78, 0f3F800000;
185
+ .loc 2 98 30
186
+ mov.b32 %r52, %f36;
187
+ mov.b32 %r53, %f78;
188
+ div.full.f32 %r51, %r52, %r53;
189
+ mov.b32 %f37, %r51;
190
+ .loc 2 98 22
191
+ add.f32 %f76, %f76, %f37;
192
+ .loc 2 101 30
193
+ sub.f32 %f38, %f35, %f76;
194
+ $L__tmp2:
195
+ .loc 1 50 50
196
+ fma.rn.f32 %f77, %f36, %f38, %f77;
197
+ .loc 1 31 36
198
+ add.s64 %rd83, %rd83, 16;
199
+ add.s64 %rd82, %rd82, 16;
200
+ setp.lt.u32 %p19, %r89, 252;
201
+ @%p19 bra $L__BB0_4;
202
+ bra.uni $L__BB0_5;
203
+ $L__BB0_1:
204
+ .loc 1 0 36
205
+ mov.b32 %r90, -4;
206
+ .loc 1 31 36
207
+ shl.b64 %rd50, %rd2, 2;
208
+ mul.wide.u32 %rd88, %r3, 4;
209
+ add.s64 %rd87, %rd50, %rd88;
210
+ add.s64 %rd85, %rd34, %rd87;
211
+ shl.b32 %r27, %r15, 14;
212
+ shl.b32 %r28, %r2, 8;
213
+ or.b32 %r29, %r27, %r28;
214
+ or.b32 %r91, %r29, %r3;
215
+ add.s32 %r30, %r5, %r3;
216
+ mul.wide.s32 %rd86, %r30, 4;
217
+ add.s64 %rd84, %rd35, %rd86;
218
+ mov.f32 %f78, 0f00000000;
219
+ mov.f32 %f77, %f78;
220
+ mov.f32 %f76, %f78;
221
+ $L__BB0_2:
222
+ .loc 1 35 50
223
+ mov.u32 %r31, 0x0;
224
+ @%p13 ld.global.L1::evict_last.b32 { %r31 }, [ %rd84 + 0 ];
225
+ @!%p13 mov.u32 %r31, %r76;
226
+ mov.b32 %f22, %r31;
227
+ .loc 1 31 36
228
+ add.s32 %r90, %r90, 4;
229
+ .loc 1 36 34
230
+ add.s32 %r39, %r90, %r91;
231
+ mul.wide.s32 %rd54, %r39, 2;
232
+ add.s64 %rd52, %rd36, %rd54;
233
+ .loc 1 36 50
234
+ mov.u16 %rs1, 0x0;
235
+ @%p13 ld.global.L1::evict_last.b16 { %rs1 }, [ %rd52 + 0 ];
236
+ @!%p13 mov.u16 %rs1, %rs12;
237
+ .loc 1 36 101
238
+ cvt.f32.bf16 %r33, %rs1;
239
+ mov.b32 %f23, %r33;
240
+ .loc 1 41 52
241
+ mov.u32 %r34, 0x0;
242
+ @%p13 ld.global.L1::evict_last.b32 { %r34 }, [ %rd85 + 0 ];
243
+ @!%p13 mov.u32 %r34, %r76;
244
+ mov.b32 %f24, %r34;
245
+ .loc 1 42 22
246
+ add.f32 %f25, %f22, %f24;
247
+ .loc 1 44 22
248
+ add.f32 %f26, %f23, %f25;
249
+ $L__tmp3:
250
+ .loc 2 96 20
251
+ sub.f32 %f27, %f26, %f76;
252
+ .loc 2 97 26
253
+ add.f32 %f78, %f78, 0f3F800000;
254
+ .loc 2 98 30
255
+ mov.b32 %r37, %f27;
256
+ mov.b32 %r38, %f78;
257
+ div.full.f32 %r36, %r37, %r38;
258
+ mov.b32 %f28, %r36;
259
+ .loc 2 98 22
260
+ add.f32 %f76, %f76, %f28;
261
+ .loc 2 101 30
262
+ sub.f32 %f29, %f26, %f76;
263
+ $L__tmp4:
264
+ .loc 1 50 50
265
+ fma.rn.f32 %f77, %f27, %f29, %f77;
266
+ .loc 1 31 36
267
+ add.s64 %rd85, %rd85, 16;
268
+ add.s64 %rd84, %rd84, 16;
269
+ setp.lt.u32 %p12, %r90, 252;
270
+ @%p12 bra $L__BB0_2;
271
+ $L__BB0_5:
272
+ .loc 1 0 36
273
+ ld.param.u64 %rd38, [triton__0d1d2d3d4d5d6de7de_param_5];
274
+ $L__tmp5:
275
+ .loc 2 120 46
276
+ mov.b32 %r66, %f76;
277
+ shfl.sync.bfly.b32 %r67, %r66, 2, 31, -1;
278
+ mov.b32 %f39, %r67;
279
+ mov.b32 %r68, %f77;
280
+ shfl.sync.bfly.b32 %r69, %r68, 2, 31, -1;
281
+ mov.b32 %f40, %r69;
282
+ mov.b32 %r70, %f78;
283
+ shfl.sync.bfly.b32 %r57, %r70, 2, 31, -1;
284
+ mov.b32 %f41, %r57;
285
+ $L__tmp6:
286
+ .loc 2 108 21
287
+ sub.f32 %f42, %f39, %f76;
288
+ .loc 2 109 28
289
+ add.f32 %f43, %f78, %f41;
290
+ .loc 2 110 39
291
+ setp.eq.f32 %p20, %f43, 0f00000000;
292
+ .loc 2 110 60
293
+ mov.b32 %r58, %f43;
294
+ div.full.f32 %r56, %r57, %r58;
295
+ mov.b32 %f44, %r56;
296
+ .loc 2 110 49
297
+ selp.f32 %f45, 0f00000000, %f44, %p20;
298
+ .loc 2 112 17
299
+ fma.rn.f32 %f46, %f42, %f45, %f76;
300
+ .loc 2 113 15
301
+ add.f32 %f47, %f77, %f40;
302
+ .loc 2 113 30
303
+ mul.f32 %f48, %f42, %f42;
304
+ .loc 2 113 38
305
+ mul.f32 %f49, %f78, %f48;
306
+ .loc 2 113 22
307
+ fma.rn.f32 %f50, %f49, %f45, %f47;
308
+ $L__tmp7:
309
+ .loc 2 120 46
310
+ mov.b32 %r71, %f46;
311
+ shfl.sync.bfly.b32 %r72, %r71, 1, 31, -1;
312
+ mov.b32 %f51, %r72;
313
+ mov.b32 %r73, %f50;
314
+ shfl.sync.bfly.b32 %r74, %r73, 1, 31, -1;
315
+ mov.b32 %f52, %r74;
316
+ shfl.sync.bfly.b32 %r60, %r58, 1, 31, -1;
317
+ mov.b32 %f53, %r60;
318
+ $L__tmp8:
319
+ .loc 2 108 21
320
+ sub.f32 %f54, %f51, %f46;
321
+ .loc 2 109 28
322
+ add.f32 %f55, %f43, %f53;
323
+ .loc 2 110 39
324
+ setp.eq.f32 %p21, %f55, 0f00000000;
325
+ .loc 2 110 60
326
+ mov.b32 %r61, %f55;
327
+ div.full.f32 %r59, %r60, %r61;
328
+ mov.b32 %f56, %r59;
329
+ .loc 2 110 49
330
+ selp.f32 %f57, 0f00000000, %f56, %p21;
331
+ .loc 2 112 17
332
+ fma.rn.f32 %f16, %f54, %f57, %f46;
333
+ .loc 2 113 15
334
+ add.f32 %f58, %f50, %f52;
335
+ .loc 2 113 30
336
+ mul.f32 %f59, %f54, %f54;
337
+ .loc 2 113 38
338
+ mul.f32 %f60, %f43, %f59;
339
+ .loc 2 113 22
340
+ fma.rn.f32 %f61, %f57, %f60, %f58;
341
+ $L__tmp9:
342
+ .loc 1 75 24
343
+ mov.b32 %r63, %f61;
344
+ mov.b32 %r64, 1132462080;
345
+ div.full.f32 %r62, %r63, %r64;
346
+ mov.b32 %f62, %r62;
347
+ .loc 1 77 24
348
+ add.f32 %f17, %f62, 0f3727C5AC;
349
+ .loc 1 58 36
350
+ add.s64 %rd91, %rd34, %rd87;
351
+ add.s64 %rd90, %rd37, %rd88;
352
+ add.s64 %rd89, %rd35, %rd86;
353
+ mov.b32 %r92, -4;
354
+ setp.lt.u64 %p28, %rd47, 50257;
355
+ rsqrt.approx.ftz.f32 %f67, %f17;
356
+ bra.uni $L__BB0_6;
357
+ $L__BB0_8:
358
+ .loc 1 0 0
359
+ mov.b32 %f18, %r75;
360
+ cvt.s64.s32 %rd30, %r81;
361
+ cvt.f32.bf16 %r77, %rs7;
362
+ mov.b32 %f19, %r77;
363
+ mov.b32 %f20, %r78;
364
+ .loc 1 69 54
365
+ mov.u32 %r83, 0x0;
366
+ @%p13 ld.global.L1::evict_first.b32 { %r83 }, [ %rd91 + 0 ];
367
+ @!%p13 mov.u32 %r83, %r76;
368
+ mov.b32 %f63, %r83;
369
+ .loc 1 70 24
370
+ add.f32 %f64, %f18, %f63;
371
+ .loc 1 72 24
372
+ add.f32 %f65, %f19, %f64;
373
+ .loc 1 73 24
374
+ sub.f32 %f66, %f65, %f16;
375
+ .loc 1 79 24
376
+ mul.f32 %f68, %f66, %f67;
377
+ .loc 1 80 24
378
+ mul.f32 %f69, %f68, %f20;
379
+ .loc 1 82 29
380
+ shl.b64 %rd80, %rd30, 1;
381
+ add.s64 %rd79, %rd38, %rd80;
382
+ .loc 1 82 52
383
+ mov.b32 %r85, %f69;
384
+ cvt.rn.bf16.f32 %rs10, %r85;
385
+ @%p13 st.global.b16 [ %rd79 + 0 ], { %rs10 };
386
+ .loc 1 58 36
387
+ add.s32 %r92, %r92, 4;
388
+ add.s64 %rd91, %rd91, 16;
389
+ add.s64 %rd90, %rd90, 16;
390
+ add.s64 %rd89, %rd89, 16;
391
+ setp.lt.u32 %p32, %r92, 252;
392
+ @%p32 bra $L__BB0_6;
393
+ bra.uni $L__BB0_9;
394
+ $L__BB0_6:
395
+ .loc 1 62 51
396
+ mov.u32 %r75, 0x0;
397
+ @%p13 ld.global.L1::evict_last.b32 { %r75 }, [ %rd89 + 0 ];
398
+ @!%p13 mov.u32 %r75, %r76;
399
+ .loc 1 63 35
400
+ add.s32 %r80, %r91, %r92;
401
+ add.s32 %r81, %r80, 4;
402
+ mul.wide.s32 %rd70, %r81, 2;
403
+ add.s64 %rd68, %rd36, %rd70;
404
+ .loc 1 63 51
405
+ mov.u16 %rs7, 0x0;
406
+ @%p13 ld.global.L1::evict_first.b16 { %rs7 }, [ %rd68 + 0 ];
407
+ @!%p13 mov.u16 %rs7, %rs12;
408
+ .loc 1 64 40
409
+ mov.u32 %r78, 0x0;
410
+ @%p13 ld.global.L1::evict_last.b32 { %r78 }, [ %rd90 + 0 ];
411
+ @!%p13 mov.u32 %r78, %r76;
412
+ .loc 1 68 57
413
+ @%p28 bra $L__BB0_8;
414
+ mov.u64 %rd71, assertMessage_1;
415
+ cvta.global.u64 %rd72, %rd71;
416
+ mov.u64 %rd73, assertFile_1;
417
+ cvta.global.u64 %rd74, %rd73;
418
+ mov.u64 %rd75, assertFunc_1;
419
+ cvta.global.u64 %rd76, %rd75;
420
+ { // callseq 11, 0
421
+ .reg .b32 temp_param_reg;
422
+ .param .b64 param0;
423
+ st.param.b64 [param0+0], %rd72;
424
+ .param .b64 param1;
425
+ st.param.b64 [param1+0], %rd74;
426
+ .param .b32 param2;
427
+ st.param.b32 [param2+0], %r88;
428
+ .param .b64 param3;
429
+ st.param.b64 [param3+0], %rd76;
430
+ .param .b64 param4;
431
+ st.param.b64 [param4+0], %rd81;
432
+ call.uni
433
+ __assertfail,
434
+ (
435
+ param0,
436
+ param1,
437
+ param2,
438
+ param3,
439
+ param4
440
+ );
441
+ } // callseq 11
442
+ bra.uni $L__BB0_8;
443
+ $L__BB0_9:
444
+ .loc 1 58 4
445
+ ret;
446
+ $L__tmp10:
447
+ $L__func_end0:
448
+
449
+ }
450
+ // .globl __nv_rsqrtf
451
+ .visible .func (.param .b32 func_retval0) __nv_rsqrtf(
452
+ .param .b32 __nv_rsqrtf_param_0
453
+ )
454
+ {
455
+ .reg .f32 %f<3>;
456
+ $L__func_begin1:
457
+
458
+ ld.param.f32 %f1, [__nv_rsqrtf_param_0];
459
+ rsqrt.approx.ftz.f32 %f2, %f1;
460
+ st.param.f32 [func_retval0+0], %f2;
461
+ ret;
462
+ $L__func_end1:
463
+
464
+ }
465
+ .file 1 "/tmp/torchinductor_root/ci/ccig6fki6p4lxrdmgg6eudahiexcvueeol2p4qp532pvve2y463y.py"
466
+ .file 2 "/usr/local/lib/python3.10/dist-packages/torch/_inductor/triton_helpers.py"
467
+ .section .debug_abbrev
468
+ {
469
+ .b8 1
470
+ .b8 17
471
+ .b8 1
472
+ .b8 37
473
+ .b8 8
474
+ .b8 19
475
+ .b8 5
476
+ .b8 3
477
+ .b8 8
478
+ .b8 16
479
+ .b8 6
480
+ .b8 27
481
+ .b8 8
482
+ .b8 180
483
+ .b8 66
484
+ .b8 12
485
+ .b8 17
486
+ .b8 1
487
+ .b8 18
488
+ .b8 1
489
+ .b8 0
490
+ .b8 0
491
+ .b8 2
492
+ .b8 46
493
+ .b8 0
494
+ .b8 135
495
+ .b8 64
496
+ .b8 8
497
+ .b8 3
498
+ .b8 8
499
+ .b8 58
500
+ .b8 11
501
+ .b8 59
502
+ .b8 11
503
+ .b8 63
504
+ .b8 12
505
+ .b8 32
506
+ .b8 11
507
+ .b8 0
508
+ .b8 0
509
+ .b8 3
510
+ .b8 46
511
+ .b8 1
512
+ .b8 17
513
+ .b8 1
514
+ .b8 18
515
+ .b8 1
516
+ .b8 64
517
+ .b8 10
518
+ .b8 49
519
+ .b8 19
520
+ .b8 0
521
+ .b8 0
522
+ .b8 4
523
+ .b8 29
524
+ .b8 0
525
+ .b8 49
526
+ .b8 19
527
+ .b8 17
528
+ .b8 1
529
+ .b8 18
530
+ .b8 1
531
+ .b8 88
532
+ .b8 11
533
+ .b8 89
534
+ .b8 11
535
+ .b8 87
536
+ .b8 11
537
+ .b8 0
538
+ .b8 0
539
+ .b8 5
540
+ .b8 29
541
+ .b8 1
542
+ .b8 49
543
+ .b8 19
544
+ .b8 17
545
+ .b8 1
546
+ .b8 18
547
+ .b8 1
548
+ .b8 88
549
+ .b8 11
550
+ .b8 89
551
+ .b8 11
552
+ .b8 87
553
+ .b8 11
554
+ .b8 0
555
+ .b8 0
556
+ .b8 0
557
+ }
558
+ .section .debug_info
559
+ {
560
+ .b32 302
561
+ .b8 2
562
+ .b8 0
563
+ .b32 .debug_abbrev
564
+ .b8 8
565
+ .b8 1
566
+ .b8 116
567
+ .b8 114
568
+ .b8 105
569
+ .b8 116
570
+ .b8 111
571
+ .b8 110
572
+ .b8 0
573
+ .b8 2
574
+ .b8 0
575
+ .b8 99
576
+ .b8 99
577
+ .b8 105
578
+ .b8 103
579
+ .b8 54
580
+ .b8 102
581
+ .b8 107
582
+ .b8 105
583
+ .b8 54
584
+ .b8 112
585
+ .b8 52
586
+ .b8 108
587
+ .b8 120
588
+ .b8 114
589
+ .b8 100
590
+ .b8 109
591
+ .b8 103
592
+ .b8 103
593
+ .b8 54
594
+ .b8 101
595
+ .b8 117
596
+ .b8 100
597
+ .b8 97
598
+ .b8 104
599
+ .b8 105
600
+ .b8 101
601
+ .b8 120
602
+ .b8 99
603
+ .b8 118
604
+ .b8 117
605
+ .b8 101
606
+ .b8 101
607
+ .b8 111
608
+ .b8 108
609
+ .b8 50
610
+ .b8 112
611
+ .b8 52
612
+ .b8 113
613
+ .b8 112
614
+ .b8 53
615
+ .b8 51
616
+ .b8 50
617
+ .b8 112
618
+ .b8 118
619
+ .b8 118
620
+ .b8 101
621
+ .b8 50
622
+ .b8 121
623
+ .b8 52
624
+ .b8 54
625
+ .b8 51
626
+ .b8 121
627
+ .b8 46
628
+ .b8 112
629
+ .b8 121
630
+ .b8 0
631
+ .b32 .debug_line
632
+ .b8 47
633
+ .b8 116
634
+ .b8 109
635
+ .b8 112
636
+ .b8 47
637
+ .b8 116
638
+ .b8 111
639
+ .b8 114
640
+ .b8 99
641
+ .b8 104
642
+ .b8 105
643
+ .b8 110
644
+ .b8 100
645
+ .b8 117
646
+ .b8 99
647
+ .b8 116
648
+ .b8 111
649
+ .b8 114
650
+ .b8 95
651
+ .b8 114
652
+ .b8 111
653
+ .b8 111
654
+ .b8 116
655
+ .b8 47
656
+ .b8 99
657
+ .b8 105
658
+ .b8 0
659
+ .b8 1
660
+ .b64 $L__func_begin0
661
+ .b64 $L__func_end0
662
+ .b8 2
663
+ .b8 116
664
+ .b8 114
665
+ .b8 105
666
+ .b8 116
667
+ .b8 111
668
+ .b8 110
669
+ .b8 95
670
+ .b8 95
671
+ .b8 48
672
+ .b8 100
673
+ .b8 49
674
+ .b8 100
675
+ .b8 50
676
+ .b8 100
677
+ .b8 51
678
+ .b8 100
679
+ .b8 52
680
+ .b8 100
681
+ .b8 53
682
+ .b8 100
683
+ .b8 54
684
+ .b8 100
685
+ .b8 101
686
+ .b8 55
687
+ .b8 100
688
+ .b8 101
689
+ .b8 0
690
+ .b8 116
691
+ .b8 114
692
+ .b8 105
693
+ .b8 116
694
+ .b8 111
695
+ .b8 110
696
+ .b8 95
697
+ .b8 95
698
+ .b8 48
699
+ .b8 100
700
+ .b8 49
701
+ .b8 100
702
+ .b8 50
703
+ .b8 100
704
+ .b8 51
705
+ .b8 100
706
+ .b8 52
707
+ .b8 100
708
+ .b8 53
709
+ .b8 100
710
+ .b8 54
711
+ .b8 100
712
+ .b8 101
713
+ .b8 55
714
+ .b8 100
715
+ .b8 101
716
+ .b8 0
717
+ .b8 1
718
+ .b8 18
719
+ .b8 1
720
+ .b8 1
721
+ .b8 3
722
+ .b64 $L__func_begin0
723
+ .b64 $L__func_end0
724
+ .b8 1
725
+ .b8 156
726
+ .b32 125
727
+ .b8 4
728
+ .b32 125
729
+ .b64 $L__tmp1
730
+ .b64 $L__tmp4
731
+ .b8 2
732
+ .b8 47
733
+ .b8 41
734
+ .b8 4
735
+ .b32 125
736
+ .b64 $L__tmp5
737
+ .b64 $L__tmp8
738
+ .b8 2
739
+ .b8 53
740
+ .b8 44
741
+ .b8 5
742
+ .b32 125
743
+ .b64 $L__tmp6
744
+ .b64 $L__tmp9
745
+ .b8 2
746
+ .b8 53
747
+ .b8 44
748
+ .b8 4
749
+ .b32 125
750
+ .b64 $L__tmp6
751
+ .b64 $L__tmp9
752
+ .b8 2
753
+ .b8 120
754
+ .b8 46
755
+ .b8 0
756
+ .b8 0
757
+ .b8 0
758
+ }
759
+ .section .debug_pubnames
760
+ {
761
+ .b32 $L__pubNames_end0-$L__pubNames_start0
762
+ $L__pubNames_start0:
763
+ .b8 2
764
+ .b8 0
765
+ .b32 .debug_info
766
+ .b32 306
767
+ .b32 125
768
+ .b8 116
769
+ .b8 114
770
+ .b8 105
771
+ .b8 116
772
+ .b8 111
773
+ .b8 110
774
+ .b8 95
775
+ .b8 95
776
+ .b8 48
777
+ .b8 100
778
+ .b8 49
779
+ .b8 100
780
+ .b8 50
781
+ .b8 100
782
+ .b8 51
783
+ .b8 100
784
+ .b8 52
785
+ .b8 100
786
+ .b8 53
787
+ .b8 100
788
+ .b8 54
789
+ .b8 100
790
+ .b8 101
791
+ .b8 55
792
+ .b8 100
793
+ .b8 101
794
+ .b8 0
795
+ .b32 0
796
+ $L__pubNames_end0:
797
+ }
798
+ .section .debug_pubtypes
799
+ {
800
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
801
+ $L__pubTypes_start0:
802
+ .b8 2
803
+ .b8 0
804
+ .b32 .debug_info
805
+ .b32 306
806
+ .b32 0
807
+ $L__pubTypes_end0:
808
+ }
809
+ .section .debug_loc { }
.triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ttgir ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #blocked = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [8, 4], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0]}>
2
+ #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0]}>
3
+ module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
4
+ tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
5
+ %cst = arith.constant dense<512> : tensor<64x1xi32, #blocked>
6
+ %cst_0 = arith.constant dense<256> : tensor<64x1xi32, #blocked>
7
+ %cst_1 = arith.constant dense<256> : tensor<64x1xi64, #blocked>
8
+ %cst_2 = arith.constant dense<0> : tensor<64x1xi64, #blocked>
9
+ %cst_3 = arith.constant dense<50257> : tensor<64x1xi64, #blocked>
10
+ %cst_4 = arith.constant dense<50257> : tensor<64x1xi64, #blocked1>
11
+ %cst_5 = arith.constant dense<0> : tensor<64x1xi64, #blocked1>
12
+ %c0_i32 = arith.constant 0 : i32
13
+ %c4_i32 = arith.constant 4 : i32
14
+ %c256_i32 = arith.constant 256 : i32
15
+ %cst_6 = arith.constant dense<1.000000e+00> : tensor<64x4xf32, #blocked>
16
+ %cst_7 = arith.constant 0.000000e+00 : f32
17
+ %cst_8 = arith.constant dense<0.000000e+00> : tensor<64x4xbf16, #blocked>
18
+ %cst_9 = arith.constant dense<0.000000e+00> : tensor<1x4xf32, #blocked>
19
+ %cst_10 = arith.constant dense<0.000000e+00> : tensor<64x4xf32, #blocked>
20
+ %cst_11 = arith.constant dense<256> : tensor<1x4xi32, #blocked>
21
+ %cst_12 = arith.constant dense<9.99999974E-6> : tensor<64x1xf32, #blocked>
22
+ %cst_13 = arith.constant dense<2.560000e+02> : tensor<64x1xf32, #blocked>
23
+ %c64_i32 = arith.constant 64 : i32
24
+ %0 = tt.get_program_id x : i32
25
+ %1 = arith.muli %0, %c64_i32 : i32
26
+ %2 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>
27
+ %3 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked1}>>
28
+ %4 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xi32, #blocked>
29
+ %5 = tt.expand_dims %3 {axis = 1 : i32} : (tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked1}>>) -> tensor<64x1xi32, #blocked1>
30
+ %6 = tt.splat %1 : (i32) -> tensor<64x1xi32, #blocked>
31
+ %7 = tt.splat %1 : (i32) -> tensor<64x1xi32, #blocked1>
32
+ %8 = arith.addi %6, %4 : tensor<64x1xi32, #blocked>
33
+ %9 = arith.addi %7, %5 : tensor<64x1xi32, #blocked1>
34
+ %10 = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32, #triton_gpu.slice<{dim = 0, parent = #blocked}>>
35
+ %11 = tt.expand_dims %10 {axis = 0 : i32} : (tensor<4xi32, #triton_gpu.slice<{dim = 0, parent = #blocked}>>) -> tensor<1x4xi32, #blocked>
36
+ %12 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<64x1x!tt.ptr<i64, 1>, #blocked>
37
+ %13 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<64x1x!tt.ptr<i64, 1>, #blocked1>
38
+ %14 = tt.addptr %12, %8 : tensor<64x1x!tt.ptr<i64, 1>, #blocked>, tensor<64x1xi32, #blocked>
39
+ %15 = tt.addptr %13, %9 : tensor<64x1x!tt.ptr<i64, 1>, #blocked1>, tensor<64x1xi32, #blocked1>
40
+ %16 = tt.load %14 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x1xi64, #blocked>
41
+ %17 = tt.load %15 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x1xi64, #blocked1>
42
+ %18 = arith.remsi %8, %cst : tensor<64x1xi32, #blocked>
43
+ %19 = arith.muli %18, %cst_0 : tensor<64x1xi32, #blocked>
44
+ %20 = tt.broadcast %19 : (tensor<64x1xi32, #blocked>) -> tensor<64x4xi32, #blocked>
45
+ %21 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<64x4x!tt.ptr<f32, 1>, #blocked>
46
+ %22 = arith.muli %8, %cst_0 : tensor<64x1xi32, #blocked>
47
+ %23 = tt.broadcast %22 : (tensor<64x1xi32, #blocked>) -> tensor<64x4xi32, #blocked>
48
+ %24 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<64x4x!tt.ptr<bf16, 1>, #blocked>
49
+ %25 = arith.addi %16, %cst_3 : tensor<64x1xi64, #blocked>
50
+ %26 = arith.addi %17, %cst_4 : tensor<64x1xi64, #blocked1>
51
+ %27 = arith.cmpi slt, %16, %cst_2 : tensor<64x1xi64, #blocked>
52
+ %28 = arith.cmpi slt, %17, %cst_5 : tensor<64x1xi64, #blocked1>
53
+ %29 = arith.select %27, %25, %16 : tensor<64x1xi1, #blocked>, tensor<64x1xi64, #blocked>
54
+ %30 = arith.select %28, %26, %17 : tensor<64x1xi1, #blocked1>, tensor<64x1xi64, #blocked1>
55
+ %31 = arith.cmpi sge, %30, %cst_5 : tensor<64x1xi64, #blocked1>
56
+ %32 = arith.cmpi slt, %30, %cst_4 : tensor<64x1xi64, #blocked1>
57
+ %33 = arith.andi %31, %32 : tensor<64x1xi1, #blocked1>
58
+ %34 = arith.muli %29, %cst_1 : tensor<64x1xi64, #blocked>
59
+ %35 = tt.broadcast %34 : (tensor<64x1xi64, #blocked>) -> tensor<64x4xi64, #blocked>
60
+ %36 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<64x4x!tt.ptr<f32, 1>, #blocked>
61
+ %37:3 = scf.for %arg8 = %c0_i32 to %c256_i32 step %c4_i32 iter_args(%arg9 = %cst_10, %arg10 = %cst_10, %arg11 = %cst_10) -> (tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>) : i32 {
62
+ %46 = tt.splat %arg8 : (i32) -> tensor<1x4xi32, #blocked>
63
+ %47 = arith.addi %46, %11 : tensor<1x4xi32, #blocked>
64
+ %48 = arith.cmpi slt, %47, %cst_11 : tensor<1x4xi32, #blocked>
65
+ %49 = tt.broadcast %47 : (tensor<1x4xi32, #blocked>) -> tensor<64x4xi32, #blocked>
66
+ %50 = arith.addi %49, %20 : tensor<64x4xi32, #blocked>
67
+ %51 = tt.addptr %21, %50 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi32, #blocked>
68
+ %52 = tt.broadcast %48 : (tensor<1x4xi1, #blocked>) -> tensor<64x4xi1, #blocked>
69
+ %53 = tt.load %51, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
70
+ %54 = arith.addi %49, %23 : tensor<64x4xi32, #blocked>
71
+ %55 = tt.addptr %24, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
72
+ %56 = tt.load %55, %52, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xbf16, #blocked>
73
+ %57 = arith.extf %56 : tensor<64x4xbf16, #blocked> to tensor<64x4xf32, #blocked>
74
+ tt.assert %33, "index out of bounds: 0 <= tmp3 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<64x1xi1, #blocked1>
75
+ %58 = arith.extsi %47 : tensor<1x4xi32, #blocked> to tensor<1x4xi64, #blocked>
76
+ %59 = tt.broadcast %58 : (tensor<1x4xi64, #blocked>) -> tensor<64x4xi64, #blocked>
77
+ %60 = arith.addi %59, %35 : tensor<64x4xi64, #blocked>
78
+ %61 = tt.addptr %36, %60 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi64, #blocked>
79
+ %62 = tt.load %61, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
80
+ %63 = arith.addf %62, %53 : tensor<64x4xf32, #blocked>
81
+ %64 = arith.addf %63, %57 : tensor<64x4xf32, #blocked>
82
+ %65 = arith.subf %64, %arg9 : tensor<64x4xf32, #blocked>
83
+ %66 = arith.addf %arg11, %cst_6 : tensor<64x4xf32, #blocked>
84
+ %67 = arith.divf %65, %66 : tensor<64x4xf32, #blocked>
85
+ %68 = arith.addf %arg9, %67 : tensor<64x4xf32, #blocked>
86
+ %69 = arith.subf %64, %68 : tensor<64x4xf32, #blocked>
87
+ %70 = arith.mulf %65, %69 : tensor<64x4xf32, #blocked>
88
+ %71 = arith.addf %arg10, %70 : tensor<64x4xf32, #blocked>
89
+ %72 = arith.select %52, %68, %arg9 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
90
+ %73 = arith.select %52, %71, %arg10 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
91
+ %74 = arith.select %52, %66, %arg11 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
92
+ scf.yield %72, %73, %74 : tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>
93
+ }
94
+ %38:3 = "tt.reduce"(%37#0, %37#1, %37#2) <{axis = 1 : i32}> ({
95
+ ^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
96
+ %46 = arith.subf %arg11, %arg8 : f32
97
+ %47 = arith.addf %arg10, %arg13 : f32
98
+ %48 = arith.cmpf oeq, %47, %cst_7 : f32
99
+ %49 = arith.divf %arg13, %47 : f32
100
+ %50 = arith.select %48, %cst_7, %49 : f32
101
+ %51 = arith.mulf %46, %50 : f32
102
+ %52 = arith.addf %arg8, %51 : f32
103
+ %53 = arith.addf %arg9, %arg12 : f32
104
+ %54 = arith.mulf %46, %46 : f32
105
+ %55 = arith.mulf %54, %arg10 : f32
106
+ %56 = arith.mulf %55, %50 : f32
107
+ %57 = arith.addf %53, %56 : f32
108
+ tt.reduce.return %52, %57, %47 : f32, f32, f32
109
+ }) : (tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>) -> (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>, tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>, tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>)
110
+ %39 = tt.expand_dims %38#0 {axis = 1 : i32} : (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xf32, #blocked>
111
+ %40 = tt.expand_dims %38#1 {axis = 1 : i32} : (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xf32, #blocked>
112
+ %41 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x4x!tt.ptr<f32, 1>, #blocked>
113
+ %42 = tt.broadcast %39 : (tensor<64x1xf32, #blocked>) -> tensor<64x4xf32, #blocked>
114
+ %43 = arith.divf %40, %cst_13 : tensor<64x1xf32, #blocked>
115
+ %44 = arith.addf %43, %cst_12 : tensor<64x1xf32, #blocked>
116
+ %45 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<64x4x!tt.ptr<bf16, 1>, #blocked>
117
+ scf.for %arg8 = %c0_i32 to %c256_i32 step %c4_i32 : i32 {
118
+ %46 = tt.splat %arg8 : (i32) -> tensor<1x4xi32, #blocked>
119
+ %47 = arith.addi %46, %11 : tensor<1x4xi32, #blocked>
120
+ %48 = arith.cmpi slt, %47, %cst_11 : tensor<1x4xi32, #blocked>
121
+ %49 = tt.broadcast %47 : (tensor<1x4xi32, #blocked>) -> tensor<64x4xi32, #blocked>
122
+ %50 = arith.addi %49, %20 : tensor<64x4xi32, #blocked>
123
+ %51 = tt.addptr %21, %50 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi32, #blocked>
124
+ %52 = tt.broadcast %48 : (tensor<1x4xi1, #blocked>) -> tensor<64x4xi1, #blocked>
125
+ %53 = tt.load %51, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
126
+ %54 = arith.addi %49, %23 : tensor<64x4xi32, #blocked>
127
+ %55 = tt.addptr %24, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
128
+ %56 = tt.load %55, %52, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<64x4xbf16, #blocked>
129
+ %57 = arith.extf %56 : tensor<64x4xbf16, #blocked> to tensor<64x4xf32, #blocked>
130
+ %58 = tt.addptr %41, %47 : tensor<1x4x!tt.ptr<f32, 1>, #blocked>, tensor<1x4xi32, #blocked>
131
+ %59 = tt.load %58, %48, %cst_9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x4xf32, #blocked>
132
+ tt.assert %33, "index out of bounds: 0 <= tmp16 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<64x1xi1, #blocked1>
133
+ %60 = arith.extsi %47 : tensor<1x4xi32, #blocked> to tensor<1x4xi64, #blocked>
134
+ %61 = tt.broadcast %60 : (tensor<1x4xi64, #blocked>) -> tensor<64x4xi64, #blocked>
135
+ %62 = arith.addi %61, %35 : tensor<64x4xi64, #blocked>
136
+ %63 = tt.addptr %36, %62 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi64, #blocked>
137
+ %64 = tt.load %63, %52, %cst_10 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
138
+ %65 = arith.addf %64, %53 : tensor<64x4xf32, #blocked>
139
+ %66 = arith.addf %65, %57 : tensor<64x4xf32, #blocked>
140
+ %67 = arith.subf %66, %42 : tensor<64x4xf32, #blocked>
141
+ %68 = tt.extern_elementwise %44 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<64x1xf32, #blocked>) -> tensor<64x1xf32, #blocked>
142
+ %69 = tt.broadcast %68 : (tensor<64x1xf32, #blocked>) -> tensor<64x4xf32, #blocked>
143
+ %70 = arith.mulf %67, %69 : tensor<64x4xf32, #blocked>
144
+ %71 = tt.broadcast %59 : (tensor<1x4xf32, #blocked>) -> tensor<64x4xf32, #blocked>
145
+ %72 = arith.mulf %70, %71 : tensor<64x4xf32, #blocked>
146
+ %73 = tt.addptr %45, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
147
+ %74 = arith.truncf %72 : tensor<64x4xf32, #blocked> to tensor<64x4xbf16, #blocked>
148
+ tt.store %73, %74, %52 {cache = 1 : i32, evict = 1 : i32} : tensor<64x4xbf16, #blocked>
149
+ }
150
+ tt.return
151
+ }
152
+ }
.triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.cubin ADDED
Binary file (4.9 kB). View file
 
.triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.ptx ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1d2de
10
+
11
+ .visible .entry triton__0d1d2de(
12
+ .param .u64 triton__0d1d2de_param_0,
13
+ .param .u64 triton__0d1d2de_param_1,
14
+ .param .u32 triton__0d1d2de_param_2
15
+ )
16
+ .maxntid 256, 1, 1
17
+ {
18
+ .reg .pred %p<3>;
19
+ .reg .b16 %rs<3>;
20
+ .reg .b32 %r<13>;
21
+ .reg .b64 %rd<7>;
22
+ .loc 1 18 0
23
+ $L__func_begin0:
24
+ .loc 1 18 0
25
+
26
+ ld.param.u64 %rd3, [triton__0d1d2de_param_0];
27
+ ld.param.u64 %rd4, [triton__0d1d2de_param_1];
28
+ $L__tmp0:
29
+ .loc 1 21 36
30
+ mov.u32 %r7, %tid.x;
31
+ shl.b32 %r8, %r7, 1;
32
+ and.b32 %r9, %r8, 510;
33
+ .loc 1 20 28
34
+ mov.u32 %r1, %ctaid.x;
35
+ .loc 1 20 33
36
+ shl.b32 %r10, %r1, 9;
37
+ .loc 1 21 23
38
+ or.b32 %r11, %r10, %r9;
39
+ .loc 1 24 30
40
+ mul.wide.s32 %rd5, %r11, 4;
41
+ add.s64 %rd1, %rd3, %rd5;
42
+ mov.pred %p1, -1;
43
+ .loc 1 24 35
44
+ mov.u32 %r4, 0x0;
45
+ mov.u32 %r5, 0x0;
46
+ @%p1 ld.global.v2.b32 { %r4, %r5 }, [ %rd1 + 0 ];
47
+ .loc 1 26 25
48
+ mul.wide.s32 %rd6, %r11, 2;
49
+ add.s64 %rd2, %rd4, %rd6;
50
+ .loc 1 26 36
51
+ cvt.rn.bf16.f32 %rs1, %r4;
52
+ cvt.rn.bf16.f32 %rs2, %r5;
53
+ mov.b32 %r12, {%rs1, %rs2};
54
+ @%p1 st.global.b32 [ %rd2 + 0 ], { %r12 };
55
+ .loc 1 26 4
56
+ ret;
57
+ $L__tmp1:
58
+ $L__func_end0:
59
+
60
+ }
61
+ .file 1 "/tmp/torchinductor_root/zj/czjxjqxojsyyr4zmce6q6twysnucw6p4l5ujgp6ts2ecrm3ue3ex.py"
62
+ .section .debug_abbrev
63
+ {
64
+ .b8 1
65
+ .b8 17
66
+ .b8 1
67
+ .b8 37
68
+ .b8 8
69
+ .b8 19
70
+ .b8 5
71
+ .b8 3
72
+ .b8 8
73
+ .b8 16
74
+ .b8 6
75
+ .b8 27
76
+ .b8 8
77
+ .b8 180
78
+ .b8 66
79
+ .b8 12
80
+ .b8 17
81
+ .b8 1
82
+ .b8 18
83
+ .b8 1
84
+ .b8 0
85
+ .b8 0
86
+ .b8 2
87
+ .b8 46
88
+ .b8 0
89
+ .b8 17
90
+ .b8 1
91
+ .b8 18
92
+ .b8 1
93
+ .b8 64
94
+ .b8 10
95
+ .b8 135
96
+ .b8 64
97
+ .b8 8
98
+ .b8 3
99
+ .b8 8
100
+ .b8 58
101
+ .b8 11
102
+ .b8 59
103
+ .b8 11
104
+ .b8 63
105
+ .b8 12
106
+ .b8 0
107
+ .b8 0
108
+ .b8 0
109
+ }
110
+ .section .debug_info
111
+ {
112
+ .b32 176
113
+ .b8 2
114
+ .b8 0
115
+ .b32 .debug_abbrev
116
+ .b8 8
117
+ .b8 1
118
+ .b8 116
119
+ .b8 114
120
+ .b8 105
121
+ .b8 116
122
+ .b8 111
123
+ .b8 110
124
+ .b8 0
125
+ .b8 2
126
+ .b8 0
127
+ .b8 99
128
+ .b8 122
129
+ .b8 106
130
+ .b8 120
131
+ .b8 106
132
+ .b8 113
133
+ .b8 120
134
+ .b8 111
135
+ .b8 106
136
+ .b8 115
137
+ .b8 121
138
+ .b8 121
139
+ .b8 114
140
+ .b8 52
141
+ .b8 122
142
+ .b8 109
143
+ .b8 99
144
+ .b8 101
145
+ .b8 54
146
+ .b8 113
147
+ .b8 54
148
+ .b8 116
149
+ .b8 119
150
+ .b8 121
151
+ .b8 115
152
+ .b8 110
153
+ .b8 117
154
+ .b8 99
155
+ .b8 119
156
+ .b8 54
157
+ .b8 112
158
+ .b8 52
159
+ .b8 108
160
+ .b8 53
161
+ .b8 117
162
+ .b8 106
163
+ .b8 103
164
+ .b8 112
165
+ .b8 54
166
+ .b8 116
167
+ .b8 115
168
+ .b8 50
169
+ .b8 101
170
+ .b8 99
171
+ .b8 114
172
+ .b8 109
173
+ .b8 51
174
+ .b8 117
175
+ .b8 101
176
+ .b8 51
177
+ .b8 101
178
+ .b8 120
179
+ .b8 46
180
+ .b8 112
181
+ .b8 121
182
+ .b8 0
183
+ .b32 .debug_line
184
+ .b8 47
185
+ .b8 116
186
+ .b8 109
187
+ .b8 112
188
+ .b8 47
189
+ .b8 116
190
+ .b8 111
191
+ .b8 114
192
+ .b8 99
193
+ .b8 104
194
+ .b8 105
195
+ .b8 110
196
+ .b8 100
197
+ .b8 117
198
+ .b8 99
199
+ .b8 116
200
+ .b8 111
201
+ .b8 114
202
+ .b8 95
203
+ .b8 114
204
+ .b8 111
205
+ .b8 111
206
+ .b8 116
207
+ .b8 47
208
+ .b8 122
209
+ .b8 106
210
+ .b8 0
211
+ .b8 1
212
+ .b64 $L__func_begin0
213
+ .b64 $L__func_end0
214
+ .b8 2
215
+ .b64 $L__func_begin0
216
+ .b64 $L__func_end0
217
+ .b8 1
218
+ .b8 156
219
+ .b8 116
220
+ .b8 114
221
+ .b8 105
222
+ .b8 116
223
+ .b8 111
224
+ .b8 110
225
+ .b8 95
226
+ .b8 95
227
+ .b8 48
228
+ .b8 100
229
+ .b8 49
230
+ .b8 100
231
+ .b8 50
232
+ .b8 100
233
+ .b8 101
234
+ .b8 0
235
+ .b8 116
236
+ .b8 114
237
+ .b8 105
238
+ .b8 116
239
+ .b8 111
240
+ .b8 110
241
+ .b8 95
242
+ .b8 95
243
+ .b8 48
244
+ .b8 100
245
+ .b8 49
246
+ .b8 100
247
+ .b8 50
248
+ .b8 100
249
+ .b8 101
250
+ .b8 0
251
+ .b8 1
252
+ .b8 18
253
+ .b8 1
254
+ .b8 0
255
+ }
256
+ .section .debug_pubnames
257
+ {
258
+ .b32 $L__pubNames_end0-$L__pubNames_start0
259
+ $L__pubNames_start0:
260
+ .b8 2
261
+ .b8 0
262
+ .b32 .debug_info
263
+ .b32 180
264
+ .b32 125
265
+ .b8 116
266
+ .b8 114
267
+ .b8 105
268
+ .b8 116
269
+ .b8 111
270
+ .b8 110
271
+ .b8 95
272
+ .b8 95
273
+ .b8 48
274
+ .b8 100
275
+ .b8 49
276
+ .b8 100
277
+ .b8 50
278
+ .b8 100
279
+ .b8 101
280
+ .b8 0
281
+ .b32 0
282
+ $L__pubNames_end0:
283
+ }
284
+ .section .debug_pubtypes
285
+ {
286
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
287
+ $L__pubTypes_start0:
288
+ .b8 2
289
+ .b8 0
290
+ .b32 .debug_info
291
+ .b32 180
292
+ .b32 0
293
+ $L__pubTypes_end0:
294
+ }
295
+ .section .debug_loc { }
.triton/dump/4c6ad48573c74d55ed79384f6b432d50/triton_.ttir ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1d2de(%arg0: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
3
+ %c1024_i32 = arith.constant 1024 : i32
4
+ %0 = tt.get_program_id x : i32
5
+ %1 = arith.muli %0, %c1024_i32 : i32
6
+ %2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
7
+ %3 = tt.splat %1 : (i32) -> tensor<1024xi32>
8
+ %4 = arith.addi %3, %2 : tensor<1024xi32>
9
+ %5 = tt.splat %arg0 : (!tt.ptr<f32, 1>) -> tensor<1024x!tt.ptr<f32, 1>>
10
+ %6 = tt.addptr %5, %4 : tensor<1024x!tt.ptr<f32, 1>>, tensor<1024xi32>
11
+ %7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1024xf32>
12
+ %8 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<1024x!tt.ptr<bf16, 1>>
13
+ %9 = tt.addptr %8, %4 : tensor<1024x!tt.ptr<bf16, 1>>, tensor<1024xi32>
14
+ %10 = arith.truncf %7 : tensor<1024xf32> to tensor<1024xbf16>
15
+ tt.store %9, %10 {cache = 1 : i32, evict = 1 : i32} : tensor<1024xbf16>
16
+ tt.return
17
+ }
18
+ }
.triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttgir ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #blocked = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [8], order = [0], CTAsPerCGA = [1], CTASplitNum = [1], CTAOrder = [0]}>
2
+ module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
3
+ tt.func public @triton__0d1d2de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
4
+ %cst = arith.constant dense<1.000000e+00> : tensor<512xf32, #blocked>
5
+ %cst_0 = arith.constant dense<0.707106769> : tensor<512xf32, #blocked>
6
+ %cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32, #blocked>
7
+ %c512_i32 = arith.constant 512 : i32
8
+ %0 = tt.get_program_id x : i32
9
+ %1 = arith.muli %0, %c512_i32 : i32
10
+ %2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked>
11
+ %3 = tt.splat %1 : (i32) -> tensor<512xi32, #blocked>
12
+ %4 = arith.addi %3, %2 : tensor<512xi32, #blocked>
13
+ %5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
14
+ %6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
15
+ %7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16, #blocked>
16
+ %8 = arith.extf %7 : tensor<512xbf16, #blocked> to tensor<512xf32, #blocked>
17
+ %9 = arith.mulf %8, %cst_1 : tensor<512xf32, #blocked>
18
+ %10 = arith.mulf %8, %cst_0 : tensor<512xf32, #blocked>
19
+ %11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32, #blocked>) -> tensor<512xf32, #blocked>
20
+ %12 = arith.addf %11, %cst : tensor<512xf32, #blocked>
21
+ %13 = arith.mulf %9, %12 : tensor<512xf32, #blocked>
22
+ %14 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
23
+ %15 = tt.addptr %14, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
24
+ %16 = arith.truncf %13 : tensor<512xf32, #blocked> to tensor<512xbf16, #blocked>
25
+ tt.store %15, %16 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16, #blocked>
26
+ tt.return
27
+ }
28
+ }
.triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttir ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1d2de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
3
+ %cst = arith.constant dense<1.000000e+00> : tensor<512xf32>
4
+ %cst_0 = arith.constant dense<0.707106769> : tensor<512xf32>
5
+ %cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32>
6
+ %c512_i32 = arith.constant 512 : i32
7
+ %0 = tt.get_program_id x : i32
8
+ %1 = arith.muli %0, %c512_i32 : i32
9
+ %2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32>
10
+ %3 = tt.splat %1 : (i32) -> tensor<512xi32>
11
+ %4 = arith.addi %3, %2 : tensor<512xi32>
12
+ %5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>>
13
+ %6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>>, tensor<512xi32>
14
+ %7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16>
15
+ %8 = arith.extf %7 : tensor<512xbf16> to tensor<512xf32>
16
+ %9 = arith.mulf %8, %cst_1 : tensor<512xf32>
17
+ %10 = arith.mulf %8, %cst_0 : tensor<512xf32>
18
+ %11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32>) -> tensor<512xf32>
19
+ %12 = arith.addf %11, %cst : tensor<512xf32>
20
+ %13 = arith.mulf %9, %12 : tensor<512xf32>
21
+ %14 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>>
22
+ %15 = tt.addptr %14, %4 : tensor<512x!tt.ptr<bf16, 1>>, tensor<512xi32>
23
+ %16 = arith.truncf %13 : tensor<512xf32> to tensor<512xbf16>
24
+ tt.store %15, %16 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16>
25
+ tt.return
26
+ }
27
+ }
.triton/dump/51e329eae41e4ee17aa201fff8371d94/triton_.llir ADDED
The diff for this file is too large to render. See raw diff
 
.triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.llir ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ; ModuleID = 'LLVMDialectModule'
2
+ source_filename = "LLVMDialectModule"
3
+
4
+ @assertFunc_1 = internal constant [8 x i8] c"<module>"
5
+ @assertFile_1 = internal constant [68 x i8] c"/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py"
6
+ @assertMessage_1 = internal constant [39 x i8] c"index out of bounds: 0 <= tmp16 < 50257"
7
+ @assertFunc_0 = internal constant [8 x i8] c"<module>"
8
+ @assertFile_0 = internal constant [68 x i8] c"/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py"
9
+ @assertMessage_0 = internal constant [38 x i8] c"index out of bounds: 0 <= tmp3 < 50257"
10
+ @global_smem = external addrspace(3) global [0 x i8]
11
+ @.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
12
+
13
+ declare void @__assertfail(ptr, ptr, i32, ptr, i64) local_unnamed_addr
14
+
15
+ define void @triton__0d1d2d3d4d5d6de7de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, i32 %6, i32 %7) local_unnamed_addr !dbg !7 {
16
+ %9 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
17
+ %10 = and i32 %9, 31, !dbg !10
18
+ %11 = lshr i32 %9, 6, !dbg !10
19
+ %12 = and i32 %11, 1, !dbg !10
20
+ %13 = and i32 %9, 1, !dbg !10
21
+ %urem = shl i32 %9, 1, !dbg !11
22
+ %14 = and i32 %urem, 126, !dbg !11
23
+ %15 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #6, !dbg !12
24
+ %16 = shl i32 %15, 1, !dbg !13
25
+ %17 = or i32 %16, %12, !dbg !14
26
+ %18 = or i32 %16, %13, !dbg !14
27
+ %19 = sext i32 %17 to i64, !dbg !15
28
+ %20 = getelementptr i64, ptr addrspace(1) %0, i64 %19, !dbg !15
29
+ %21 = sext i32 %18 to i64, !dbg !15
30
+ %22 = getelementptr i64, ptr addrspace(1) %0, i64 %21, !dbg !15
31
+ %23 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %20, i1 true) #6, !dbg !16
32
+ %24 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %20, i1 true) #6, !dbg !16
33
+ %25 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %22, i1 true) #6, !dbg !16
34
+ %26 = srem i32 %17, 512, !dbg !17
35
+ %27 = shl nsw i32 %26, 8, !dbg !18
36
+ %28 = shl i32 %17, 8, !dbg !19
37
+ %29 = add i64 %25, 50257, !dbg !20
38
+ %30 = icmp slt i64 %23, 0, !dbg !21
39
+ %31 = icmp slt i64 %25, 0, !dbg !21
40
+ %32 = select i1 %31, i64 %29, i64 %25, !dbg !22
41
+ %33 = icmp ugt i64 %32, 50256, !dbg !23
42
+ %34 = shl i64 %23, 8, !dbg !24
43
+ %35 = add i64 %34, 12865792, !dbg !24
44
+ %36 = select i1 %30, i64 %35, i64 %34, !dbg !24
45
+ %37 = getelementptr float, ptr addrspace(1) %1, i64 %36
46
+ %38 = or i32 %14, %27, !dbg !25
47
+ %39 = sext i32 %38 to i64, !dbg !26
48
+ %40 = getelementptr float, ptr addrspace(1) %2, i64 %39, !dbg !26
49
+ %41 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %40, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !27
50
+ %42 = extractvalue { i32, i32 } %41, 0, !dbg !27
51
+ %43 = extractvalue { i32, i32 } %41, 1, !dbg !27
52
+ %44 = insertelement <2 x i32> poison, i32 %42, i64 0, !dbg !27
53
+ %45 = insertelement <2 x i32> %44, i32 %43, i64 1, !dbg !27
54
+ %46 = bitcast <2 x i32> %45 to <2 x float>, !dbg !27
55
+ %47 = or i32 %14, %28, !dbg !28
56
+ %48 = sext i32 %47 to i64, !dbg !29
57
+ %49 = getelementptr i16, ptr addrspace(1) %3, i64 %48, !dbg !29
58
+ %50 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %49, i1 true, i32 0, i1 true) #6, !dbg !30
59
+ %51 = trunc i32 %50 to i16, !dbg !30
60
+ %extelt.offset2 = lshr i32 %50, 16, !dbg !30
61
+ %52 = trunc i32 %extelt.offset2 to i16, !dbg !30
62
+ %53 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %51) #6, !dbg !31
63
+ %54 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %52) #6, !dbg !31
64
+ br i1 %33, label %55, label %56, !dbg !32
65
+
66
+ 55: ; preds = %8
67
+ tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 1892, ptr nonnull @assertFunc_0, i64 1), !dbg !32
68
+ br label %56, !dbg !32
69
+
70
+ 56: ; preds = %55, %8
71
+ %57 = zext nneg i32 %14 to i64, !dbg !33
72
+ %58 = getelementptr float, ptr addrspace(1) %37, i64 %57, !dbg !34
73
+ %59 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %58, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !35
74
+ %60 = extractvalue { i32, i32 } %59, 0, !dbg !35
75
+ %61 = extractvalue { i32, i32 } %59, 1, !dbg !35
76
+ %62 = insertelement <2 x i32> poison, i32 %60, i64 0, !dbg !35
77
+ %63 = insertelement <2 x i32> %62, i32 %61, i64 1, !dbg !35
78
+ %64 = bitcast <2 x i32> %63 to <2 x float>, !dbg !35
79
+ %65 = fadd <2 x float> %46, %64, !dbg !36
80
+ %66 = insertelement <2 x float> poison, float %53, i64 0, !dbg !37
81
+ %67 = insertelement <2 x float> %66, float %54, i64 1, !dbg !37
82
+ %68 = fadd <2 x float> %67, %65, !dbg !37
83
+ %69 = extractelement <2 x float> %68, i64 0, !dbg !38
84
+ %70 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %69, float 1.000000e+00) #6, !dbg !38
85
+ %71 = extractelement <2 x float> %68, i64 1, !dbg !38
86
+ %72 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %71, float 1.000000e+00) #6, !dbg !38
87
+ %73 = insertelement <2 x float> poison, float %70, i64 0, !dbg !42
88
+ %74 = insertelement <2 x float> %73, float %72, i64 1, !dbg !42
89
+ %75 = fadd <2 x float> %74, zeroinitializer, !dbg !42
90
+ %76 = fsub <2 x float> %68, %75, !dbg !43
91
+ %77 = fmul <2 x float> %68, %76, !dbg !44
92
+ %78 = fadd <2 x float> %77, zeroinitializer, !dbg !45
93
+ %79 = or i32 %14, 128, !dbg !46
94
+ %80 = or i32 %79, %27, !dbg !25
95
+ %81 = sext i32 %80 to i64, !dbg !26
96
+ %82 = getelementptr float, ptr addrspace(1) %2, i64 %81, !dbg !26
97
+ %83 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %82, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !27
98
+ %84 = extractvalue { i32, i32 } %83, 0, !dbg !27
99
+ %85 = extractvalue { i32, i32 } %83, 1, !dbg !27
100
+ %86 = insertelement <2 x i32> poison, i32 %84, i64 0, !dbg !27
101
+ %87 = insertelement <2 x i32> %86, i32 %85, i64 1, !dbg !27
102
+ %88 = bitcast <2 x i32> %87 to <2 x float>, !dbg !27
103
+ %89 = or i32 %79, %28, !dbg !28
104
+ %90 = sext i32 %89 to i64, !dbg !29
105
+ %91 = getelementptr i16, ptr addrspace(1) %3, i64 %90, !dbg !29
106
+ %92 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %91, i1 true, i32 0, i1 true) #6, !dbg !30
107
+ %93 = trunc i32 %92 to i16, !dbg !30
108
+ %extelt.offset2.1 = lshr i32 %92, 16, !dbg !30
109
+ %94 = trunc i32 %extelt.offset2.1 to i16, !dbg !30
110
+ %95 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %93) #6, !dbg !31
111
+ %96 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %94) #6, !dbg !31
112
+ br i1 %33, label %97, label %98, !dbg !32
113
+
114
+ 97: ; preds = %56
115
+ tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 1892, ptr nonnull @assertFunc_0, i64 1), !dbg !32
116
+ br label %98, !dbg !32
117
+
118
+ 98: ; preds = %97, %56
119
+ %99 = zext nneg i32 %79 to i64, !dbg !33
120
+ %100 = getelementptr float, ptr addrspace(1) %37, i64 %99, !dbg !34
121
+ %101 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %100, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !35
122
+ %102 = extractvalue { i32, i32 } %101, 0, !dbg !35
123
+ %103 = extractvalue { i32, i32 } %101, 1, !dbg !35
124
+ %104 = insertelement <2 x i32> poison, i32 %102, i64 0, !dbg !35
125
+ %105 = insertelement <2 x i32> %104, i32 %103, i64 1, !dbg !35
126
+ %106 = bitcast <2 x i32> %105 to <2 x float>, !dbg !35
127
+ %107 = fadd <2 x float> %88, %106, !dbg !36
128
+ %108 = insertelement <2 x float> poison, float %95, i64 0, !dbg !37
129
+ %109 = insertelement <2 x float> %108, float %96, i64 1, !dbg !37
130
+ %110 = fadd <2 x float> %109, %107, !dbg !37
131
+ %111 = fsub <2 x float> %110, %75, !dbg !47
132
+ %112 = extractelement <2 x float> %111, i64 0, !dbg !38
133
+ %113 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %112, float 2.000000e+00) #6, !dbg !38
134
+ %114 = extractelement <2 x float> %111, i64 1, !dbg !38
135
+ %115 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %114, float 2.000000e+00) #6, !dbg !38
136
+ %116 = insertelement <2 x float> poison, float %113, i64 0, !dbg !42
137
+ %117 = insertelement <2 x float> %116, float %115, i64 1, !dbg !42
138
+ %118 = fadd <2 x float> %75, %117, !dbg !42
139
+ %119 = fsub <2 x float> %110, %118, !dbg !43
140
+ %120 = fmul <2 x float> %111, %119, !dbg !44
141
+ %121 = fadd <2 x float> %78, %120, !dbg !45
142
+ %122 = lshr i32 %9, 5, !dbg !10
143
+ %123 = and i32 %122, 1, !dbg !11
144
+ %124 = and i32 %9, 127, !dbg !11
145
+ %125 = zext nneg i32 %124 to i64, !dbg !48
146
+ %126 = getelementptr float, ptr addrspace(3) @global_smem, i64 %125, !dbg !48
147
+ store <1 x float> <float 2.000000e+00>, ptr addrspace(3) %126, align 4, !dbg !48
148
+ %127 = add nuw nsw i32 %124, 130, !dbg !48
149
+ %128 = zext nneg i32 %127 to i64, !dbg !48
150
+ %129 = getelementptr float, ptr addrspace(3) @global_smem, i64 %128, !dbg !48
151
+ store <1 x float> <float 2.000000e+00>, ptr addrspace(3) %129, align 4, !dbg !48
152
+ tail call void @llvm.nvvm.barrier0(), !dbg !48
153
+ %130 = mul nuw nsw i32 %12, 130, !dbg !48
154
+ %131 = add nuw nsw i32 %130, %14, !dbg !48
155
+ %132 = zext nneg i32 %131 to i64, !dbg !48
156
+ %133 = getelementptr float, ptr addrspace(3) @global_smem, i64 %132, !dbg !48
157
+ %134 = load float, ptr addrspace(3) %133, align 8, !dbg !48
158
+ %135 = getelementptr inbounds <2 x float>, ptr addrspace(3) %133, i64 0, i64 1, !dbg !48
159
+ %136 = load float, ptr addrspace(3) %135, align 4, !dbg !48
160
+ tail call void @llvm.nvvm.barrier0(), !dbg !49
161
+ %137 = extractelement <2 x float> %118, i64 0, !dbg !51
162
+ %138 = extractelement <2 x float> %118, i64 1, !dbg !55
163
+ %139 = fsub float %138, %137, !dbg !55
164
+ %140 = fadd float %134, %136, !dbg !56
165
+ %141 = fcmp oeq float %140, 0.000000e+00, !dbg !57
166
+ %142 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %136, float %140) #6, !dbg !58
167
+ %143 = select i1 %141, float 0.000000e+00, float %142, !dbg !59
168
+ %144 = fmul float %139, %143, !dbg !60
169
+ %145 = fadd float %137, %144, !dbg !51
170
+ %shift = shufflevector <2 x float> %121, <2 x float> poison, <2 x i32> <i32 1, i32 poison>, !dbg !61
171
+ %146 = fadd <2 x float> %121, %shift, !dbg !61
172
+ %147 = extractelement <2 x float> %146, i64 0, !dbg !61
173
+ %148 = fmul float %139, %139, !dbg !62
174
+ %149 = fmul float %148, %134, !dbg !63
175
+ %150 = fmul float %149, %143, !dbg !64
176
+ %151 = fadd float %147, %150, !dbg !65
177
+ %152 = bitcast float %145 to i32, !dbg !49
178
+ %153 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %152, i32 16, i32 31), !dbg !49
179
+ %154 = bitcast i32 %153 to float, !dbg !49
180
+ %155 = bitcast float %151 to i32, !dbg !49
181
+ %156 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %155, i32 16, i32 31), !dbg !49
182
+ %157 = bitcast i32 %156 to float, !dbg !49
183
+ %158 = bitcast float %140 to i32, !dbg !49
184
+ %159 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %158, i32 16, i32 31), !dbg !49
185
+ %160 = bitcast i32 %159 to float, !dbg !49
186
+ %161 = fsub float %154, %145, !dbg !55
187
+ %162 = fadd float %140, %160, !dbg !56
188
+ %163 = fcmp oeq float %162, 0.000000e+00, !dbg !57
189
+ %164 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %160, float %162) #6, !dbg !58
190
+ %165 = select i1 %163, float 0.000000e+00, float %164, !dbg !59
191
+ %166 = fmul float %161, %165, !dbg !60
192
+ %167 = fadd float %145, %166, !dbg !51
193
+ %168 = fadd float %151, %157, !dbg !61
194
+ %169 = fmul float %161, %161, !dbg !62
195
+ %170 = fmul float %140, %169, !dbg !63
196
+ %171 = fmul float %170, %165, !dbg !64
197
+ %172 = fadd float %168, %171, !dbg !65
198
+ %173 = bitcast float %167 to i32, !dbg !49
199
+ %174 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %173, i32 8, i32 31), !dbg !49
200
+ %175 = bitcast i32 %174 to float, !dbg !49
201
+ %176 = bitcast float %172 to i32, !dbg !49
202
+ %177 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %176, i32 8, i32 31), !dbg !49
203
+ %178 = bitcast i32 %177 to float, !dbg !49
204
+ %179 = bitcast float %162 to i32, !dbg !49
205
+ %180 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %179, i32 8, i32 31), !dbg !49
206
+ %181 = bitcast i32 %180 to float, !dbg !49
207
+ %182 = fsub float %175, %167, !dbg !55
208
+ %183 = fadd float %162, %181, !dbg !56
209
+ %184 = fcmp oeq float %183, 0.000000e+00, !dbg !57
210
+ %185 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %181, float %183) #6, !dbg !58
211
+ %186 = select i1 %184, float 0.000000e+00, float %185, !dbg !59
212
+ %187 = fmul float %182, %186, !dbg !60
213
+ %188 = fadd float %167, %187, !dbg !51
214
+ %189 = fadd float %172, %178, !dbg !61
215
+ %190 = fmul float %182, %182, !dbg !62
216
+ %191 = fmul float %162, %190, !dbg !63
217
+ %192 = fmul float %186, %191, !dbg !64
218
+ %193 = fadd float %189, %192, !dbg !65
219
+ %194 = bitcast float %188 to i32, !dbg !49
220
+ %195 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %194, i32 4, i32 31), !dbg !49
221
+ %196 = bitcast i32 %195 to float, !dbg !49
222
+ %197 = bitcast float %193 to i32, !dbg !49
223
+ %198 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %197, i32 4, i32 31), !dbg !49
224
+ %199 = bitcast i32 %198 to float, !dbg !49
225
+ %200 = bitcast float %183 to i32, !dbg !49
226
+ %201 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %200, i32 4, i32 31), !dbg !49
227
+ %202 = bitcast i32 %201 to float, !dbg !49
228
+ %203 = fsub float %196, %188, !dbg !55
229
+ %204 = fadd float %183, %202, !dbg !56
230
+ %205 = fcmp oeq float %204, 0.000000e+00, !dbg !57
231
+ %206 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %202, float %204) #6, !dbg !58
232
+ %207 = select i1 %205, float 0.000000e+00, float %206, !dbg !59
233
+ %208 = fmul float %203, %207, !dbg !60
234
+ %209 = fadd float %188, %208, !dbg !51
235
+ %210 = fadd float %193, %199, !dbg !61
236
+ %211 = fmul float %203, %203, !dbg !62
237
+ %212 = fmul float %183, %211, !dbg !63
238
+ %213 = fmul float %207, %212, !dbg !64
239
+ %214 = fadd float %210, %213, !dbg !65
240
+ %215 = bitcast float %209 to i32, !dbg !49
241
+ %216 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %215, i32 2, i32 31), !dbg !49
242
+ %217 = bitcast i32 %216 to float, !dbg !49
243
+ %218 = bitcast float %214 to i32, !dbg !49
244
+ %219 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %218, i32 2, i32 31), !dbg !49
245
+ %220 = bitcast i32 %219 to float, !dbg !49
246
+ %221 = bitcast float %204 to i32, !dbg !49
247
+ %222 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %221, i32 2, i32 31), !dbg !49
248
+ %223 = bitcast i32 %222 to float, !dbg !49
249
+ %224 = fsub float %217, %209, !dbg !55
250
+ %225 = fadd float %204, %223, !dbg !56
251
+ %226 = fcmp oeq float %225, 0.000000e+00, !dbg !57
252
+ %227 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %223, float %225) #6, !dbg !58
253
+ %228 = select i1 %226, float 0.000000e+00, float %227, !dbg !59
254
+ %229 = fmul float %224, %228, !dbg !60
255
+ %230 = fadd float %209, %229, !dbg !51
256
+ %231 = fadd float %214, %220, !dbg !61
257
+ %232 = fmul float %224, %224, !dbg !62
258
+ %233 = fmul float %204, %232, !dbg !63
259
+ %234 = fmul float %228, %233, !dbg !64
260
+ %235 = fadd float %231, %234, !dbg !65
261
+ %236 = bitcast float %230 to i32, !dbg !49
262
+ %237 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %236, i32 1, i32 31), !dbg !49
263
+ %238 = bitcast i32 %237 to float, !dbg !49
264
+ %239 = bitcast float %235 to i32, !dbg !49
265
+ %240 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %239, i32 1, i32 31), !dbg !49
266
+ %241 = bitcast i32 %240 to float, !dbg !49
267
+ %242 = bitcast float %225 to i32, !dbg !49
268
+ %243 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %242, i32 1, i32 31), !dbg !49
269
+ %244 = bitcast i32 %243 to float, !dbg !49
270
+ %245 = fsub float %238, %230, !dbg !55
271
+ %246 = fadd float %225, %244, !dbg !56
272
+ %247 = fcmp oeq float %246, 0.000000e+00, !dbg !57
273
+ %248 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %244, float %246) #6, !dbg !58
274
+ %249 = select i1 %247, float 0.000000e+00, float %248, !dbg !59
275
+ %250 = fmul float %245, %249, !dbg !60
276
+ %251 = fadd float %230, %250, !dbg !51
277
+ %252 = fadd float %235, %241, !dbg !61
278
+ %253 = fmul float %245, %245, !dbg !62
279
+ %254 = fmul float %225, %253, !dbg !63
280
+ %255 = fmul float %249, %254, !dbg !64
281
+ %256 = fadd float %252, %255, !dbg !65
282
+ %257 = icmp eq i32 %10, 0, !dbg !49
283
+ %258 = shl nuw nsw i32 %12, 1, !dbg !49
284
+ %259 = or i32 %258, %123, !dbg !49
285
+ %260 = zext nneg i32 %259 to i64, !dbg !49
286
+ %261 = getelementptr float, ptr addrspace(3) @global_smem, i64 %260, !dbg !49
287
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %261, float %251, i1 %257) #6, !dbg !49
288
+ %262 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %260, !dbg !49
289
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %262, float %256, i1 %257) #6, !dbg !49
290
+ %263 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 32), i64 %260, !dbg !49
291
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %263, float %246, i1 %257) #6, !dbg !49
292
+ tail call void @llvm.nvvm.barrier0(), !dbg !49
293
+ %264 = icmp slt i32 %9, 4, !dbg !49
294
+ %265 = sext i32 %9 to i64, !dbg !49
295
+ %266 = getelementptr float, ptr addrspace(3) @global_smem, i64 %265, !dbg !49
296
+ %267 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %266, i1 %264) #6, !dbg !49
297
+ %268 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %265, !dbg !49
298
+ %269 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %268, i1 %264) #6, !dbg !49
299
+ %270 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 32), i64 %265, !dbg !49
300
+ %271 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %270, i1 %264) #6, !dbg !49
301
+ %272 = bitcast float %267 to i32, !dbg !49
302
+ %273 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %272, i32 1, i32 31), !dbg !49
303
+ %274 = bitcast i32 %273 to float, !dbg !49
304
+ %275 = bitcast float %269 to i32, !dbg !49
305
+ %276 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %275, i32 1, i32 31), !dbg !49
306
+ %277 = bitcast i32 %276 to float, !dbg !49
307
+ %278 = bitcast float %271 to i32, !dbg !49
308
+ %279 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %278, i32 1, i32 31), !dbg !49
309
+ %280 = bitcast i32 %279 to float, !dbg !49
310
+ %281 = fsub float %274, %267, !dbg !55
311
+ %282 = fadd float %271, %280, !dbg !56
312
+ %283 = fcmp oeq float %282, 0.000000e+00, !dbg !57
313
+ %284 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %280, float %282) #6, !dbg !58
314
+ %285 = select i1 %283, float 0.000000e+00, float %284, !dbg !59
315
+ %286 = fmul float %281, %285, !dbg !60
316
+ %287 = fadd float %267, %286, !dbg !51
317
+ %288 = fadd float %269, %277, !dbg !61
318
+ %289 = fmul float %281, %281, !dbg !62
319
+ %290 = fmul float %271, %289, !dbg !63
320
+ %291 = fmul float %290, %285, !dbg !64
321
+ %292 = fadd float %288, %291, !dbg !65
322
+ %293 = icmp eq i32 %13, 0, !dbg !49
323
+ %294 = and i1 %264, %293, !dbg !49
324
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %266, float %287, i1 %294) #6, !dbg !49
325
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %268, float %292, i1 %294) #6, !dbg !49
326
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %270, float %282, i1 %294) #6, !dbg !49
327
+ tail call void @llvm.nvvm.barrier0(), !dbg !49
328
+ %295 = zext nneg i32 %258 to i64, !dbg !49
329
+ %296 = getelementptr float, ptr addrspace(3) @global_smem, i64 %295, !dbg !49
330
+ %297 = load float, ptr addrspace(3) %296, align 4, !dbg !49
331
+ %298 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %295, !dbg !49
332
+ %299 = load float, ptr addrspace(3) %298, align 4, !dbg !49
333
+ %300 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %299, float 2.560000e+02) #6, !dbg !66
334
+ %301 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %299, float 2.560000e+02) #6, !dbg !66
335
+ %302 = fadd float %300, 0x3EE4F8B580000000, !dbg !67
336
+ %303 = getelementptr float, ptr addrspace(3) @global_smem, i64 %57
337
+ %304 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %40, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !68
338
+ %305 = extractvalue { i32, i32 } %304, 0, !dbg !68
339
+ %306 = extractvalue { i32, i32 } %304, 1, !dbg !68
340
+ %307 = bitcast i32 %305 to float, !dbg !68
341
+ %308 = bitcast i32 %306 to float, !dbg !68
342
+ %309 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %49, i1 true, i32 0, i1 true) #6, !dbg !69
343
+ %310 = trunc i32 %309 to i16, !dbg !69
344
+ %extelt.offset = lshr i32 %309, 16, !dbg !69
345
+ %311 = trunc i32 %extelt.offset to i16, !dbg !69
346
+ %312 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %310) #6, !dbg !70
347
+ %313 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %311) #6, !dbg !70
348
+ %314 = getelementptr float, ptr addrspace(1) %4, i64 %125, !dbg !71
349
+ %315 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %314, i1 true, i32 0, i1 true) #6, !dbg !72
350
+ br i1 %33, label %316, label %317, !dbg !73
351
+
352
+ 316: ; preds = %98
353
+ tail call void @__assertfail(ptr nonnull @assertMessage_1, ptr nonnull @assertFile_1, i32 1892, ptr nonnull @assertFunc_1, i64 1), !dbg !73
354
+ br label %317, !dbg !73
355
+
356
+ 317: ; preds = %316, %98
357
+ %318 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %58, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !74
358
+ %319 = extractvalue { i32, i32 } %318, 0, !dbg !74
359
+ %320 = extractvalue { i32, i32 } %318, 1, !dbg !74
360
+ %321 = bitcast i32 %319 to float, !dbg !74
361
+ %322 = bitcast i32 %320 to float, !dbg !74
362
+ %323 = fadd float %307, %321, !dbg !75
363
+ %324 = fadd float %308, %322, !dbg !75
364
+ %325 = fadd float %312, %323, !dbg !76
365
+ %326 = fadd float %313, %324, !dbg !76
366
+ %327 = fsub float %325, %297, !dbg !77
367
+ %328 = fsub float %326, %297, !dbg !77
368
+ %329 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
369
+ %.not.i = icmp eq i32 %329, 0, !dbg !78
370
+ br i1 %.not.i, label %332, label %330, !dbg !78
371
+
372
+ 330: ; preds = %317
373
+ %331 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %302), !dbg !78
374
+ br label %__nv_rsqrtf.exit, !dbg !78
375
+
376
+ 332: ; preds = %317
377
+ %333 = tail call float @llvm.nvvm.rsqrt.approx.f(float %302), !dbg !78
378
+ br label %__nv_rsqrtf.exit, !dbg !78
379
+
380
+ __nv_rsqrtf.exit: ; preds = %330, %332
381
+ %.0.i = phi float [ %331, %330 ], [ %333, %332 ], !dbg !78
382
+ %334 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
383
+ %335 = fmul float %327, %.0.i, !dbg !79
384
+ %336 = fmul float %328, %.0.i, !dbg !79
385
+ tail call void @llvm.nvvm.barrier0(), !dbg !80
386
+ store i32 %315, ptr addrspace(3) %126, align 4, !dbg !80
387
+ tail call void @llvm.nvvm.barrier0(), !dbg !80
388
+ %337 = load float, ptr addrspace(3) %303, align 8, !dbg !80
389
+ %338 = getelementptr inbounds <2 x float>, ptr addrspace(3) %303, i64 0, i64 1, !dbg !80
390
+ %339 = load float, ptr addrspace(3) %338, align 4, !dbg !80
391
+ %340 = fmul float %335, %337, !dbg !80
392
+ %341 = fmul float %336, %339, !dbg !80
393
+ %342 = getelementptr i16, ptr addrspace(1) %5, i64 %48, !dbg !81
394
+ %343 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %340) #6, !dbg !82
395
+ %344 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %341) #6, !dbg !82
396
+ %345 = insertelement <2 x i16> undef, i16 %343, i64 0, !dbg !82
397
+ %346 = insertelement <2 x i16> %345, i16 %344, i64 1, !dbg !82
398
+ %347 = bitcast <2 x i16> %346 to i32, !dbg !82
399
+ tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %347, ptr addrspace(1) %342, i1 true) #6, !dbg !82
400
+ %348 = or i32 %124, 128, !dbg !83
401
+ %349 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %82, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !68
402
+ %350 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %91, i1 true, i32 0, i1 true) #6, !dbg !69
403
+ %351 = trunc i32 %350 to i16, !dbg !69
404
+ %extelt.offset.1 = lshr i32 %350, 16, !dbg !69
405
+ %352 = trunc i32 %extelt.offset.1 to i16, !dbg !69
406
+ %353 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %351) #6, !dbg !70
407
+ %354 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %352) #6, !dbg !70
408
+ %355 = zext nneg i32 %348 to i64, !dbg !71
409
+ %356 = getelementptr float, ptr addrspace(1) %4, i64 %355, !dbg !71
410
+ %357 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %356, i1 true, i32 0, i1 true) #6, !dbg !72
411
+ br i1 %33, label %358, label %359, !dbg !73
412
+
413
+ 358: ; preds = %__nv_rsqrtf.exit
414
+ tail call void @__assertfail(ptr nonnull @assertMessage_1, ptr nonnull @assertFile_1, i32 1892, ptr nonnull @assertFunc_1, i64 1), !dbg !73
415
+ br label %359, !dbg !73
416
+
417
+ 359: ; preds = %358, %__nv_rsqrtf.exit
418
+ %360 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %100, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !74
419
+ %361 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
420
+ %.not.i.1 = icmp eq i32 %361, 0, !dbg !78
421
+ br i1 %.not.i.1, label %364, label %362, !dbg !78
422
+
423
+ 362: ; preds = %359
424
+ %363 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %302), !dbg !78
425
+ br label %__nv_rsqrtf.exit.1, !dbg !78
426
+
427
+ 364: ; preds = %359
428
+ %365 = tail call float @llvm.nvvm.rsqrt.approx.f(float %302), !dbg !78
429
+ br label %__nv_rsqrtf.exit.1, !dbg !78
430
+
431
+ __nv_rsqrtf.exit.1: ; preds = %364, %362
432
+ %.0.i.1 = phi float [ %363, %362 ], [ %365, %364 ], !dbg !78
433
+ %366 = extractvalue { i32, i32 } %349, 1, !dbg !68
434
+ %367 = bitcast i32 %366 to float, !dbg !68
435
+ %368 = extractvalue { i32, i32 } %360, 1, !dbg !74
436
+ %369 = bitcast i32 %368 to float, !dbg !74
437
+ %370 = fadd float %367, %369, !dbg !75
438
+ %371 = fadd float %354, %370, !dbg !76
439
+ %372 = fsub float %371, %297, !dbg !77
440
+ %373 = extractvalue { i32, i32 } %349, 0, !dbg !68
441
+ %374 = bitcast i32 %373 to float, !dbg !68
442
+ %375 = extractvalue { i32, i32 } %360, 0, !dbg !74
443
+ %376 = bitcast i32 %375 to float, !dbg !74
444
+ %377 = fadd float %374, %376, !dbg !75
445
+ %378 = fadd float %353, %377, !dbg !76
446
+ %379 = fsub float %378, %297, !dbg !77
447
+ %380 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
448
+ %381 = fmul float %379, %.0.i.1, !dbg !79
449
+ %382 = fmul float %372, %.0.i.1, !dbg !79
450
+ tail call void @llvm.nvvm.barrier0(), !dbg !80
451
+ store i32 %357, ptr addrspace(3) %126, align 4, !dbg !80
452
+ tail call void @llvm.nvvm.barrier0(), !dbg !80
453
+ %383 = load float, ptr addrspace(3) %303, align 8, !dbg !80
454
+ %384 = load float, ptr addrspace(3) %338, align 4, !dbg !80
455
+ %385 = fmul float %381, %383, !dbg !80
456
+ %386 = fmul float %382, %384, !dbg !80
457
+ %387 = getelementptr i16, ptr addrspace(1) %5, i64 %90, !dbg !81
458
+ %388 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %385) #6, !dbg !82
459
+ %389 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %386) #6, !dbg !82
460
+ %390 = insertelement <2 x i16> undef, i16 %388, i64 0, !dbg !82
461
+ %391 = insertelement <2 x i16> %390, i16 %389, i64 1, !dbg !82
462
+ %392 = bitcast <2 x i16> %391 to i32, !dbg !82
463
+ tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %392, ptr addrspace(1) %387, i1 true) #6, !dbg !82
464
+ ret void, !dbg !84
465
+ }
466
+
467
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
468
+ declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
469
+
470
+ ; Function Attrs: convergent nocallback nounwind
471
+ declare void @llvm.nvvm.barrier0() #1
472
+
473
+ ; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
474
+ declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #2
475
+
476
+ ; Function Attrs: alwaysinline nounwind
477
+ define float @__nv_rsqrtf(float %x) local_unnamed_addr #3 {
478
+ %1 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6
479
+ %.not = icmp eq i32 %1, 0
480
+ br i1 %.not, label %4, label %2
481
+
482
+ 2: ; preds = %0
483
+ %3 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %x)
484
+ br label %6
485
+
486
+ 4: ; preds = %0
487
+ %5 = tail call float @llvm.nvvm.rsqrt.approx.f(float %x)
488
+ br label %6
489
+
490
+ 6: ; preds = %4, %2
491
+ %.0 = phi float [ %3, %2 ], [ %5, %4 ]
492
+ ret float %.0
493
+ }
494
+
495
+ declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
496
+
497
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
498
+ declare float @llvm.nvvm.rsqrt.approx.ftz.f(float) #5
499
+
500
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
501
+ declare float @llvm.nvvm.rsqrt.approx.f(float) #5
502
+
503
+ attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
504
+ attributes #1 = { convergent nocallback nounwind }
505
+ attributes #2 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
506
+ attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
507
+ attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
508
+ attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
509
+ attributes #6 = { nounwind }
510
+
511
+ !llvm.module.flags = !{!0, !1}
512
+ !llvm.dbg.cu = !{!2}
513
+ !nvvm.annotations = !{!4, !5, !5, !4}
514
+ !llvm.ident = !{!6}
515
+
516
+ !0 = !{i32 2, !"Debug Info Version", i32 3}
517
+ !1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
518
+ !2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
519
+ !3 = !DIFile(filename: "ccig6fki6p4lxrdmgg6eudahiexcvueeol2p4qp532pvve2y463y.py", directory: "/tmp/torchinductor_root/ci")
520
+ !4 = !{ptr @triton__0d1d2d3d4d5d6de7de, !"kernel", i32 1}
521
+ !5 = !{ptr @triton__0d1d2d3d4d5d6de7de, !"maxntidx", i32 128}
522
+ !6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
523
+ !7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6de7de", linkageName: "triton__0d1d2d3d4d5d6de7de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
524
+ !8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
525
+ !9 = !{}
526
+ !10 = !DILocation(line: 22, column: 44, scope: !7)
527
+ !11 = !DILocation(line: 24, column: 33, scope: !7)
528
+ !12 = !DILocation(line: 21, column: 28, scope: !7)
529
+ !13 = !DILocation(line: 21, column: 33, scope: !7)
530
+ !14 = !DILocation(line: 22, column: 23, scope: !7)
531
+ !15 = !DILocation(line: 26, column: 30, scope: !7)
532
+ !16 = !DILocation(line: 26, column: 35, scope: !7)
533
+ !17 = !DILocation(line: 27, column: 18, scope: !7)
534
+ !18 = !DILocation(line: 35, column: 44, scope: !7)
535
+ !19 = !DILocation(line: 36, column: 44, scope: !7)
536
+ !20 = !DILocation(line: 37, column: 22, scope: !7)
537
+ !21 = !DILocation(line: 38, column: 22, scope: !7)
538
+ !22 = !DILocation(line: 39, column: 36, scope: !7)
539
+ !23 = !DILocation(line: 40, column: 40, scope: !7)
540
+ !24 = !DILocation(line: 41, column: 44, scope: !7)
541
+ !25 = !DILocation(line: 35, column: 40, scope: !7)
542
+ !26 = !DILocation(line: 35, column: 34, scope: !7)
543
+ !27 = !DILocation(line: 35, column: 50, scope: !7)
544
+ !28 = !DILocation(line: 36, column: 40, scope: !7)
545
+ !29 = !DILocation(line: 36, column: 34, scope: !7)
546
+ !30 = !DILocation(line: 36, column: 50, scope: !7)
547
+ !31 = !DILocation(line: 36, column: 101, scope: !7)
548
+ !32 = !DILocation(line: 40, column: 55, scope: !7)
549
+ !33 = !DILocation(line: 41, column: 40, scope: !7)
550
+ !34 = !DILocation(line: 41, column: 34, scope: !7)
551
+ !35 = !DILocation(line: 41, column: 52, scope: !7)
552
+ !36 = !DILocation(line: 42, column: 22, scope: !7)
553
+ !37 = !DILocation(line: 44, column: 22, scope: !7)
554
+ !38 = !DILocation(line: 98, column: 30, scope: !39, inlinedAt: !41)
555
+ !39 = distinct !DILexicalBlockFile(scope: !7, file: !40, discriminator: 0)
556
+ !40 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
557
+ !41 = !DILocation(line: 47, column: 41, scope: !39)
558
+ !42 = !DILocation(line: 98, column: 22, scope: !39, inlinedAt: !41)
559
+ !43 = !DILocation(line: 101, column: 30, scope: !39, inlinedAt: !41)
560
+ !44 = !DILocation(line: 101, column: 22, scope: !39, inlinedAt: !41)
561
+ !45 = !DILocation(line: 50, column: 50, scope: !7)
562
+ !46 = !DILocation(line: 32, column: 27, scope: !7)
563
+ !47 = !DILocation(line: 96, column: 20, scope: !39, inlinedAt: !41)
564
+ !48 = !DILocation(line: 31, column: 36, scope: !7)
565
+ !49 = !DILocation(line: 120, column: 46, scope: !39, inlinedAt: !50)
566
+ !50 = !DILocation(line: 53, column: 44, scope: !39)
567
+ !51 = !DILocation(line: 112, column: 17, scope: !52, inlinedAt: !53)
568
+ !52 = distinct !DILexicalBlockFile(scope: !39, file: !40, discriminator: 0)
569
+ !53 = !DILocation(line: 120, column: 46, scope: !52, inlinedAt: !54)
570
+ !54 = !DILocation(line: 53, column: 44, scope: !52)
571
+ !55 = !DILocation(line: 108, column: 21, scope: !52, inlinedAt: !53)
572
+ !56 = !DILocation(line: 109, column: 28, scope: !52, inlinedAt: !53)
573
+ !57 = !DILocation(line: 110, column: 39, scope: !52, inlinedAt: !53)
574
+ !58 = !DILocation(line: 110, column: 60, scope: !52, inlinedAt: !53)
575
+ !59 = !DILocation(line: 110, column: 49, scope: !52, inlinedAt: !53)
576
+ !60 = !DILocation(line: 112, column: 25, scope: !52, inlinedAt: !53)
577
+ !61 = !DILocation(line: 113, column: 15, scope: !52, inlinedAt: !53)
578
+ !62 = !DILocation(line: 113, column: 30, scope: !52, inlinedAt: !53)
579
+ !63 = !DILocation(line: 113, column: 38, scope: !52, inlinedAt: !53)
580
+ !64 = !DILocation(line: 113, column: 49, scope: !52, inlinedAt: !53)
581
+ !65 = !DILocation(line: 113, column: 22, scope: !52, inlinedAt: !53)
582
+ !66 = !DILocation(line: 75, column: 24, scope: !7)
583
+ !67 = !DILocation(line: 77, column: 24, scope: !7)
584
+ !68 = !DILocation(line: 62, column: 51, scope: !7)
585
+ !69 = !DILocation(line: 63, column: 51, scope: !7)
586
+ !70 = !DILocation(line: 63, column: 103, scope: !7)
587
+ !71 = !DILocation(line: 64, column: 35, scope: !7)
588
+ !72 = !DILocation(line: 64, column: 40, scope: !7)
589
+ !73 = !DILocation(line: 68, column: 57, scope: !7)
590
+ !74 = !DILocation(line: 69, column: 54, scope: !7)
591
+ !75 = !DILocation(line: 70, column: 24, scope: !7)
592
+ !76 = !DILocation(line: 72, column: 24, scope: !7)
593
+ !77 = !DILocation(line: 73, column: 24, scope: !7)
594
+ !78 = !DILocation(line: 78, column: 30, scope: !7)
595
+ !79 = !DILocation(line: 79, column: 24, scope: !7)
596
+ !80 = !DILocation(line: 80, column: 24, scope: !7)
597
+ !81 = !DILocation(line: 82, column: 29, scope: !7)
598
+ !82 = !DILocation(line: 82, column: 52, scope: !7)
599
+ !83 = !DILocation(line: 59, column: 27, scope: !7)
600
+ !84 = !DILocation(line: 58, column: 4, scope: !7)
.triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.ttir ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
3
+ %cst = arith.constant dense<0.000000e+00> : tensor<2x128xbf16>
4
+ %cst_0 = arith.constant 0.000000e+00 : f32
5
+ %cst_1 = arith.constant dense<1.000000e+00> : tensor<2x128xf32>
6
+ %c256_i32 = arith.constant 256 : i32
7
+ %c128_i32 = arith.constant 128 : i32
8
+ %c0_i32 = arith.constant 0 : i32
9
+ %cst_2 = arith.constant dense<256> : tensor<2x1xi64>
10
+ %cst_3 = arith.constant dense<0> : tensor<2x1xi64>
11
+ %cst_4 = arith.constant dense<50257> : tensor<2x1xi64>
12
+ %cst_5 = arith.constant dense<9.99999974E-6> : tensor<2x1xf32>
13
+ %cst_6 = arith.constant dense<2.560000e+02> : tensor<2x1xf32>
14
+ %cst_7 = arith.constant dense<0.000000e+00> : tensor<1x128xf32>
15
+ %cst_8 = arith.constant dense<0.000000e+00> : tensor<2x128xf32>
16
+ %cst_9 = arith.constant dense<256> : tensor<2x1xi32>
17
+ %cst_10 = arith.constant dense<256> : tensor<1x128xi32>
18
+ %cst_11 = arith.constant dense<512> : tensor<2x1xi32>
19
+ %c2_i32 = arith.constant 2 : i32
20
+ %0 = tt.get_program_id x : i32
21
+ %1 = arith.muli %0, %c2_i32 : i32
22
+ %2 = tt.make_range {end = 2 : i32, start = 0 : i32} : tensor<2xi32>
23
+ %3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<2xi32>) -> tensor<2x1xi32>
24
+ %4 = tt.splat %1 : (i32) -> tensor<2x1xi32>
25
+ %5 = arith.addi %4, %3 : tensor<2x1xi32>
26
+ %6 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
27
+ %7 = tt.expand_dims %6 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>
28
+ %8 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<2x1x!tt.ptr<i64, 1>>
29
+ %9 = tt.addptr %8, %5 : tensor<2x1x!tt.ptr<i64, 1>>, tensor<2x1xi32>
30
+ %10 = tt.load %9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x1xi64>
31
+ %11 = arith.remsi %5, %cst_11 : tensor<2x1xi32>
32
+ %12 = arith.muli %11, %cst_9 : tensor<2x1xi32>
33
+ %13 = tt.broadcast %12 : (tensor<2x1xi32>) -> tensor<2x128xi32>
34
+ %14 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
35
+ %15 = arith.muli %5, %cst_9 : tensor<2x1xi32>
36
+ %16 = tt.broadcast %15 : (tensor<2x1xi32>) -> tensor<2x128xi32>
37
+ %17 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
38
+ %18 = arith.addi %10, %cst_4 : tensor<2x1xi64>
39
+ %19 = arith.cmpi slt, %10, %cst_3 : tensor<2x1xi64>
40
+ %20 = arith.select %19, %18, %10 : tensor<2x1xi1>, tensor<2x1xi64>
41
+ %21 = arith.cmpi sge, %20, %cst_3 : tensor<2x1xi64>
42
+ %22 = arith.cmpi slt, %20, %cst_4 : tensor<2x1xi64>
43
+ %23 = arith.andi %21, %22 : tensor<2x1xi1>
44
+ %24 = arith.muli %20, %cst_2 : tensor<2x1xi64>
45
+ %25 = tt.broadcast %24 : (tensor<2x1xi64>) -> tensor<2x128xi64>
46
+ %26 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
47
+ %27:3 = scf.for %arg8 = %c0_i32 to %c256_i32 step %c128_i32 iter_args(%arg9 = %cst_8, %arg10 = %cst_8, %arg11 = %cst_8) -> (tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>) : i32 {
48
+ %51 = tt.splat %arg8 : (i32) -> tensor<1x128xi32>
49
+ %52 = arith.addi %51, %7 : tensor<1x128xi32>
50
+ %53 = arith.cmpi slt, %52, %cst_10 : tensor<1x128xi32>
51
+ %54 = tt.broadcast %52 : (tensor<1x128xi32>) -> tensor<2x128xi32>
52
+ %55 = arith.addi %54, %13 : tensor<2x128xi32>
53
+ %56 = tt.addptr %14, %55 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi32>
54
+ %57 = tt.broadcast %53 : (tensor<1x128xi1>) -> tensor<2x128xi1>
55
+ %58 = tt.load %56, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
56
+ %59 = arith.addi %54, %16 : tensor<2x128xi32>
57
+ %60 = tt.addptr %17, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
58
+ %61 = tt.load %60, %57, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xbf16>
59
+ %62 = arith.extf %61 : tensor<2x128xbf16> to tensor<2x128xf32>
60
+ tt.assert %23, "index out of bounds: 0 <= tmp3 < 50257", "/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py", "<module>", 1892 : tensor<2x1xi1>
61
+ %63 = arith.extsi %52 : tensor<1x128xi32> to tensor<1x128xi64>
62
+ %64 = tt.broadcast %63 : (tensor<1x128xi64>) -> tensor<2x128xi64>
63
+ %65 = arith.addi %64, %25 : tensor<2x128xi64>
64
+ %66 = tt.addptr %26, %65 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi64>
65
+ %67 = tt.load %66, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
66
+ %68 = arith.addf %67, %58 : tensor<2x128xf32>
67
+ %69 = arith.addf %68, %62 : tensor<2x128xf32>
68
+ %70 = arith.subf %69, %arg9 : tensor<2x128xf32>
69
+ %71 = arith.addf %arg11, %cst_1 : tensor<2x128xf32>
70
+ %72 = arith.divf %70, %71 : tensor<2x128xf32>
71
+ %73 = arith.addf %arg9, %72 : tensor<2x128xf32>
72
+ %74 = arith.subf %69, %73 : tensor<2x128xf32>
73
+ %75 = arith.mulf %70, %74 : tensor<2x128xf32>
74
+ %76 = arith.addf %arg10, %75 : tensor<2x128xf32>
75
+ %77 = arith.select %57, %73, %arg9 : tensor<2x128xi1>, tensor<2x128xf32>
76
+ %78 = arith.select %57, %76, %arg10 : tensor<2x128xi1>, tensor<2x128xf32>
77
+ %79 = arith.select %57, %71, %arg11 : tensor<2x128xi1>, tensor<2x128xf32>
78
+ scf.yield %77, %78, %79 : tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>
79
+ }
80
+ %28:3 = "tt.reduce"(%27#0, %27#1, %27#2) <{axis = 1 : i32}> ({
81
+ ^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
82
+ %51 = arith.subf %arg11, %arg8 : f32
83
+ %52 = arith.addf %arg10, %arg13 : f32
84
+ %53 = arith.cmpf oeq, %52, %cst_0 : f32
85
+ %54 = arith.divf %arg13, %52 : f32
86
+ %55 = arith.select %53, %cst_0, %54 : f32
87
+ %56 = arith.mulf %51, %55 : f32
88
+ %57 = arith.addf %arg8, %56 : f32
89
+ %58 = arith.addf %arg9, %arg12 : f32
90
+ %59 = arith.mulf %51, %51 : f32
91
+ %60 = arith.mulf %59, %arg10 : f32
92
+ %61 = arith.mulf %60, %55 : f32
93
+ %62 = arith.addf %58, %61 : f32
94
+ tt.reduce.return %57, %62, %52 : f32, f32, f32
95
+ }) : (tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>) -> (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>)
96
+ %29 = tt.expand_dims %28#0 {axis = 1 : i32} : (tensor<2xf32>) -> tensor<2x1xf32>
97
+ %30 = tt.expand_dims %28#1 {axis = 1 : i32} : (tensor<2xf32>) -> tensor<2x1xf32>
98
+ %31 = arith.muli %11, %cst_9 : tensor<2x1xi32>
99
+ %32 = tt.broadcast %31 : (tensor<2x1xi32>) -> tensor<2x128xi32>
100
+ %33 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
101
+ %34 = arith.muli %5, %cst_9 : tensor<2x1xi32>
102
+ %35 = tt.broadcast %34 : (tensor<2x1xi32>) -> tensor<2x128xi32>
103
+ %36 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
104
+ %37 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x128x!tt.ptr<f32, 1>>
105
+ %38 = arith.addi %10, %cst_4 : tensor<2x1xi64>
106
+ %39 = arith.cmpi slt, %10, %cst_3 : tensor<2x1xi64>
107
+ %40 = arith.select %39, %38, %10 : tensor<2x1xi1>, tensor<2x1xi64>
108
+ %41 = arith.cmpi sge, %40, %cst_3 : tensor<2x1xi64>
109
+ %42 = arith.cmpi slt, %40, %cst_4 : tensor<2x1xi64>
110
+ %43 = arith.andi %41, %42 : tensor<2x1xi1>
111
+ %44 = arith.muli %40, %cst_2 : tensor<2x1xi64>
112
+ %45 = tt.broadcast %44 : (tensor<2x1xi64>) -> tensor<2x128xi64>
113
+ %46 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
114
+ %47 = tt.broadcast %29 : (tensor<2x1xf32>) -> tensor<2x128xf32>
115
+ %48 = arith.divf %30, %cst_6 : tensor<2x1xf32>
116
+ %49 = arith.addf %48, %cst_5 : tensor<2x1xf32>
117
+ %50 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
118
+ scf.for %arg8 = %c0_i32 to %c256_i32 step %c128_i32 : i32 {
119
+ %51 = tt.splat %arg8 : (i32) -> tensor<1x128xi32>
120
+ %52 = arith.addi %51, %7 : tensor<1x128xi32>
121
+ %53 = arith.cmpi slt, %52, %cst_10 : tensor<1x128xi32>
122
+ %54 = tt.broadcast %52 : (tensor<1x128xi32>) -> tensor<2x128xi32>
123
+ %55 = arith.addi %54, %32 : tensor<2x128xi32>
124
+ %56 = tt.addptr %33, %55 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi32>
125
+ %57 = tt.broadcast %53 : (tensor<1x128xi1>) -> tensor<2x128xi1>
126
+ %58 = tt.load %56, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
127
+ %59 = arith.addi %54, %35 : tensor<2x128xi32>
128
+ %60 = tt.addptr %36, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
129
+ %61 = tt.load %60, %57, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<2x128xbf16>
130
+ %62 = arith.extf %61 : tensor<2x128xbf16> to tensor<2x128xf32>
131
+ %63 = tt.addptr %37, %52 : tensor<1x128x!tt.ptr<f32, 1>>, tensor<1x128xi32>
132
+ %64 = tt.load %63, %53, %cst_7 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x128xf32>
133
+ tt.assert %43, "index out of bounds: 0 <= tmp16 < 50257", "/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py", "<module>", 1892 : tensor<2x1xi1>
134
+ %65 = arith.extsi %52 : tensor<1x128xi32> to tensor<1x128xi64>
135
+ %66 = tt.broadcast %65 : (tensor<1x128xi64>) -> tensor<2x128xi64>
136
+ %67 = arith.addi %66, %45 : tensor<2x128xi64>
137
+ %68 = tt.addptr %46, %67 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi64>
138
+ %69 = tt.load %68, %57, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<2x128xf32>
139
+ %70 = arith.addf %69, %58 : tensor<2x128xf32>
140
+ %71 = arith.addf %70, %62 : tensor<2x128xf32>
141
+ %72 = arith.subf %71, %47 : tensor<2x128xf32>
142
+ %73 = tt.extern_elementwise %49 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<2x1xf32>) -> tensor<2x1xf32>
143
+ %74 = tt.broadcast %73 : (tensor<2x1xf32>) -> tensor<2x128xf32>
144
+ %75 = arith.mulf %72, %74 : tensor<2x128xf32>
145
+ %76 = tt.broadcast %64 : (tensor<1x128xf32>) -> tensor<2x128xf32>
146
+ %77 = arith.mulf %75, %76 : tensor<2x128xf32>
147
+ %78 = tt.addptr %50, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
148
+ %79 = arith.truncf %77 : tensor<2x128xf32> to tensor<2x128xbf16>
149
+ tt.store %78, %79, %57 {cache = 1 : i32, evict = 1 : i32} : tensor<2x128xbf16>
150
+ }
151
+ tt.return
152
+ }
153
+ }
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.cubin ADDED
Binary file (39.4 kB). View file
 
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.llir ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ; ModuleID = 'LLVMDialectModule'
2
+ source_filename = "LLVMDialectModule"
3
+
4
+ @global_smem = external addrspace(3) global [0 x i8]
5
+ @.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
6
+
7
+ define void @triton__0d1d2d3d4de5(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, i64 %4, i64 %5) local_unnamed_addr !dbg !7 {
8
+ %7 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
9
+ %8 = lshr i32 %7, 5, !dbg !10
10
+ %urem = and i32 %7, 255, !dbg !10
11
+ %9 = or i32 %urem, 256, !dbg !10
12
+ %10 = or i32 %urem, 512, !dbg !10
13
+ %11 = or i32 %urem, 768, !dbg !10
14
+ %12 = or i32 %urem, 1024, !dbg !10
15
+ %13 = or i32 %urem, 1280, !dbg !10
16
+ %14 = or i32 %urem, 1536, !dbg !10
17
+ %15 = or i32 %urem, 1792, !dbg !10
18
+ %16 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #5, !dbg !11
19
+ %17 = sext i32 %16 to i64, !dbg !12
20
+ %18 = insertelement <8 x i32> poison, i32 %urem, i64 0
21
+ %19 = insertelement <8 x i32> %18, i32 %9, i64 1
22
+ %20 = insertelement <8 x i32> %19, i32 %10, i64 2
23
+ %21 = insertelement <8 x i32> %20, i32 %11, i64 3
24
+ %22 = insertelement <8 x i32> %21, i32 %12, i64 4
25
+ %23 = insertelement <8 x i32> %22, i32 %13, i64 5
26
+ %24 = insertelement <8 x i32> %23, i32 %14, i64 6
27
+ %25 = insertelement <8 x i32> %24, i32 %15, i64 7
28
+ %26 = zext <8 x i32> %25 to <8 x i64>
29
+ %27 = mul nsw i64 %17, 50257, !dbg !13
30
+ %invariant.gep = getelementptr i16, ptr addrspace(1) %0, i64 %27, !dbg !14
31
+ br label %28, !dbg !14
32
+
33
+ 28: ; preds = %6, %28
34
+ %29 = phi i32 [ 0, %6 ], [ %81, %28 ]
35
+ %30 = phi <8 x float> [ <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, %6 ], [ %80, %28 ]
36
+ %31 = zext nneg i32 %29 to i64, !dbg !15
37
+ %32 = fcmp ord <8 x float> %30, zeroinitializer, !dbg !16
38
+ %33 = insertelement <8 x i64> poison, i64 %31, i64 0, !dbg !15
39
+ %34 = shufflevector <8 x i64> %33, <8 x i64> poison, <8 x i32> zeroinitializer, !dbg !15
40
+ %35 = or <8 x i64> %34, %26, !dbg !15
41
+ %36 = icmp ult <8 x i64> %35, <i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !20
42
+ %37 = extractelement <8 x i64> %35, i64 0, !dbg !21
43
+ %gep = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %37, !dbg !21
44
+ %38 = extractelement <8 x i64> %35, i64 1, !dbg !21
45
+ %gep21 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %38, !dbg !21
46
+ %39 = extractelement <8 x i64> %35, i64 2, !dbg !21
47
+ %gep23 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %39, !dbg !21
48
+ %40 = extractelement <8 x i64> %35, i64 3, !dbg !21
49
+ %gep25 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %40, !dbg !21
50
+ %41 = extractelement <8 x i64> %35, i64 4, !dbg !21
51
+ %gep27 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %41, !dbg !21
52
+ %42 = extractelement <8 x i64> %35, i64 5, !dbg !21
53
+ %gep29 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %42, !dbg !21
54
+ %43 = extractelement <8 x i64> %35, i64 6, !dbg !21
55
+ %gep31 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %43, !dbg !21
56
+ %44 = extractelement <8 x i64> %35, i64 7, !dbg !21
57
+ %gep33 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %44, !dbg !21
58
+ %45 = extractelement <8 x i1> %36, i64 0, !dbg !22
59
+ %46 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep, i1 %45, i16 0, i1 %45) #5, !dbg !22
60
+ %47 = extractelement <8 x i1> %36, i64 1, !dbg !22
61
+ %48 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep21, i1 %47, i16 0, i1 %47) #5, !dbg !22
62
+ %49 = extractelement <8 x i1> %36, i64 2, !dbg !22
63
+ %50 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep23, i1 %49, i16 0, i1 %49) #5, !dbg !22
64
+ %51 = extractelement <8 x i1> %36, i64 3, !dbg !22
65
+ %52 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep25, i1 %51, i16 0, i1 %51) #5, !dbg !22
66
+ %53 = extractelement <8 x i1> %36, i64 4, !dbg !22
67
+ %54 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep27, i1 %53, i16 0, i1 %53) #5, !dbg !22
68
+ %55 = extractelement <8 x i1> %36, i64 5, !dbg !22
69
+ %56 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep29, i1 %55, i16 0, i1 %55) #5, !dbg !22
70
+ %57 = extractelement <8 x i1> %36, i64 6, !dbg !22
71
+ %58 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep31, i1 %57, i16 0, i1 %57) #5, !dbg !22
72
+ %59 = extractelement <8 x i1> %36, i64 7, !dbg !22
73
+ %60 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep33, i1 %59, i16 0, i1 %59) #5, !dbg !22
74
+ %61 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %46) #5, !dbg !23
75
+ %62 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %48) #5, !dbg !23
76
+ %63 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %50) #5, !dbg !23
77
+ %64 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %52) #5, !dbg !23
78
+ %65 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %54) #5, !dbg !23
79
+ %66 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %56) #5, !dbg !23
80
+ %67 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %58) #5, !dbg !23
81
+ %68 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %60) #5, !dbg !23
82
+ %69 = insertelement <8 x float> poison, float %61, i64 0, !dbg !24
83
+ %70 = insertelement <8 x float> %69, float %62, i64 1, !dbg !24
84
+ %71 = insertelement <8 x float> %70, float %63, i64 2, !dbg !24
85
+ %72 = insertelement <8 x float> %71, float %64, i64 3, !dbg !24
86
+ %73 = insertelement <8 x float> %72, float %65, i64 4, !dbg !24
87
+ %74 = insertelement <8 x float> %73, float %66, i64 5, !dbg !24
88
+ %75 = insertelement <8 x float> %74, float %67, i64 6, !dbg !24
89
+ %76 = insertelement <8 x float> %75, float %68, i64 7, !dbg !24
90
+ %77 = fcmp ule <8 x float> %30, %76, !dbg !24
91
+ %78 = and <8 x i1> %32, %77, !dbg !25
92
+ %79 = and <8 x i1> %36, %78, !dbg !26
93
+ %80 = select <8 x i1> %79, <8 x float> %76, <8 x float> %30, !dbg !26
94
+ %81 = add nuw nsw i32 %29, 2048, !dbg !14
95
+ %82 = icmp ult i32 %29, 48209, !dbg !14
96
+ br i1 %82, label %28, label %83, !dbg !14
97
+
98
+ 83: ; preds = %28
99
+ %84 = and i32 %7, 31, !dbg !10
100
+ %85 = and i32 %8, 7, !dbg !10
101
+ %86 = extractelement <8 x float> %80, i64 0, !dbg !27
102
+ %87 = extractelement <8 x float> %80, i64 1, !dbg !27
103
+ %88 = fcmp ogt float %86, %87, !dbg !27
104
+ %89 = fcmp uno float %86, 0.000000e+00, !dbg !31
105
+ %90 = or i1 %88, %89, !dbg !32
106
+ %91 = select i1 %90, float %86, float %87, !dbg !33
107
+ %92 = extractelement <8 x float> %80, i64 2, !dbg !27
108
+ %93 = fcmp ogt float %91, %92, !dbg !27
109
+ %94 = fcmp uno float %91, 0.000000e+00, !dbg !31
110
+ %95 = or i1 %93, %94, !dbg !32
111
+ %96 = select i1 %95, float %91, float %92, !dbg !33
112
+ %97 = extractelement <8 x float> %80, i64 3, !dbg !27
113
+ %98 = fcmp ogt float %96, %97, !dbg !27
114
+ %99 = fcmp uno float %96, 0.000000e+00, !dbg !31
115
+ %100 = or i1 %98, %99, !dbg !32
116
+ %101 = select i1 %100, float %96, float %97, !dbg !33
117
+ %102 = extractelement <8 x float> %80, i64 4, !dbg !27
118
+ %103 = fcmp ogt float %101, %102, !dbg !27
119
+ %104 = fcmp uno float %101, 0.000000e+00, !dbg !31
120
+ %105 = or i1 %103, %104, !dbg !32
121
+ %106 = select i1 %105, float %101, float %102, !dbg !33
122
+ %107 = extractelement <8 x float> %80, i64 5, !dbg !27
123
+ %108 = fcmp ogt float %106, %107, !dbg !27
124
+ %109 = fcmp uno float %106, 0.000000e+00, !dbg !31
125
+ %110 = or i1 %108, %109, !dbg !32
126
+ %111 = select i1 %110, float %106, float %107, !dbg !33
127
+ %112 = extractelement <8 x float> %80, i64 6, !dbg !27
128
+ %113 = fcmp ogt float %111, %112, !dbg !27
129
+ %114 = fcmp uno float %111, 0.000000e+00, !dbg !31
130
+ %115 = or i1 %113, %114, !dbg !32
131
+ %116 = select i1 %115, float %111, float %112, !dbg !33
132
+ %117 = extractelement <8 x float> %80, i64 7, !dbg !27
133
+ %118 = fcmp ogt float %116, %117, !dbg !27
134
+ %119 = fcmp uno float %116, 0.000000e+00, !dbg !31
135
+ %120 = or i1 %118, %119, !dbg !32
136
+ %121 = select i1 %120, float %116, float %117, !dbg !33
137
+ %122 = bitcast float %121 to i32, !dbg !34
138
+ %123 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %122, i32 16, i32 31), !dbg !34
139
+ %124 = bitcast i32 %123 to float, !dbg !34
140
+ %125 = fcmp ogt float %121, %124, !dbg !27
141
+ %126 = fcmp uno float %121, 0.000000e+00, !dbg !31
142
+ %127 = or i1 %126, %125, !dbg !32
143
+ %128 = select i1 %127, float %121, float %124, !dbg !33
144
+ %129 = bitcast float %128 to i32, !dbg !34
145
+ %130 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %129, i32 8, i32 31), !dbg !34
146
+ %131 = bitcast i32 %130 to float, !dbg !34
147
+ %132 = fcmp ogt float %128, %131, !dbg !27
148
+ %133 = fcmp uno float %128, 0.000000e+00, !dbg !31
149
+ %134 = or i1 %132, %133, !dbg !32
150
+ %135 = select i1 %134, float %128, float %131, !dbg !33
151
+ %136 = bitcast float %135 to i32, !dbg !34
152
+ %137 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %136, i32 4, i32 31), !dbg !34
153
+ %138 = bitcast i32 %137 to float, !dbg !34
154
+ %139 = fcmp ogt float %135, %138, !dbg !27
155
+ %140 = fcmp uno float %135, 0.000000e+00, !dbg !31
156
+ %141 = or i1 %139, %140, !dbg !32
157
+ %142 = select i1 %141, float %135, float %138, !dbg !33
158
+ %143 = bitcast float %142 to i32, !dbg !34
159
+ %144 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %143, i32 2, i32 31), !dbg !34
160
+ %145 = bitcast i32 %144 to float, !dbg !34
161
+ %146 = fcmp ogt float %142, %145, !dbg !27
162
+ %147 = fcmp uno float %142, 0.000000e+00, !dbg !31
163
+ %148 = or i1 %146, %147, !dbg !32
164
+ %149 = select i1 %148, float %142, float %145, !dbg !33
165
+ %150 = bitcast float %149 to i32, !dbg !34
166
+ %151 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %150, i32 1, i32 31), !dbg !34
167
+ %152 = bitcast i32 %151 to float, !dbg !34
168
+ %153 = fcmp ogt float %149, %152, !dbg !27
169
+ %154 = fcmp uno float %149, 0.000000e+00, !dbg !31
170
+ %155 = or i1 %153, %154, !dbg !32
171
+ %156 = select i1 %155, float %149, float %152, !dbg !33
172
+ %157 = icmp eq i32 %84, 0, !dbg !34
173
+ %158 = zext nneg i32 %85 to i64, !dbg !34
174
+ %159 = getelementptr float, ptr addrspace(3) @global_smem, i64 %158, !dbg !34
175
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %159, float %156, i1 %157) #5, !dbg !34
176
+ tail call void @llvm.nvvm.barrier0(), !dbg !34
177
+ %160 = icmp slt i32 %7, 8, !dbg !34
178
+ %161 = sext i32 %7 to i64, !dbg !34
179
+ %162 = getelementptr float, ptr addrspace(3) @global_smem, i64 %161, !dbg !34
180
+ %163 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %162, i1 %160) #5, !dbg !34
181
+ %164 = bitcast float %163 to i32, !dbg !34
182
+ %165 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %164, i32 4, i32 31), !dbg !34
183
+ %166 = bitcast i32 %165 to float, !dbg !34
184
+ %167 = fcmp ogt float %163, %166, !dbg !27
185
+ %168 = fcmp uno float %163, 0.000000e+00, !dbg !31
186
+ %169 = or i1 %168, %167, !dbg !32
187
+ %170 = select i1 %169, float %163, float %166, !dbg !33
188
+ %171 = bitcast float %170 to i32, !dbg !34
189
+ %172 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %171, i32 2, i32 31), !dbg !34
190
+ %173 = bitcast i32 %172 to float, !dbg !34
191
+ %174 = fcmp ogt float %170, %173, !dbg !27
192
+ %175 = fcmp uno float %170, 0.000000e+00, !dbg !31
193
+ %176 = or i1 %174, %175, !dbg !32
194
+ %177 = select i1 %176, float %170, float %173, !dbg !33
195
+ %178 = bitcast float %177 to i32, !dbg !34
196
+ %179 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %178, i32 1, i32 31), !dbg !34
197
+ %180 = bitcast i32 %179 to float, !dbg !34
198
+ %181 = fcmp ogt float %177, %180, !dbg !27
199
+ %182 = fcmp uno float %177, 0.000000e+00, !dbg !31
200
+ %183 = or i1 %181, %182, !dbg !32
201
+ %184 = select i1 %183, float %177, float %180, !dbg !33
202
+ %185 = and i32 %7, 7, !dbg !34
203
+ %186 = icmp eq i32 %185, 0, !dbg !34
204
+ %187 = and i1 %160, %186, !dbg !34
205
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %162, float %184, i1 %187) #5, !dbg !34
206
+ tail call void @llvm.nvvm.barrier0(), !dbg !34
207
+ %188 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !34
208
+ tail call void @llvm.nvvm.barrier0(), !dbg !36
209
+ %189 = insertelement <1 x float> undef, float %188, i64 0, !dbg !36
210
+ store <1 x float> %189, ptr addrspace(3) @global_smem, align 4, !dbg !36
211
+ tail call void @llvm.nvvm.barrier0(), !dbg !36
212
+ %190 = load i32, ptr addrspace(3) @global_smem, align 4, !dbg !36
213
+ %191 = getelementptr float, ptr addrspace(1) %1, i64 %17, !dbg !37
214
+ %192 = icmp eq i32 %urem, 0, !dbg !38
215
+ tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %190, ptr addrspace(1) %191, i1 %192) #5, !dbg !38
216
+ br label %193, !dbg !39
217
+
218
+ 193: ; preds = %83, %193
219
+ %194 = phi i32 [ 0, %83 ], [ %267, %193 ]
220
+ %195 = phi <8 x float> [ zeroinitializer, %83 ], [ %266, %193 ]
221
+ %196 = zext nneg i32 %194 to i64, !dbg !40
222
+ %197 = insertelement <8 x i64> poison, i64 %196, i64 0, !dbg !40
223
+ %198 = shufflevector <8 x i64> %197, <8 x i64> poison, <8 x i32> zeroinitializer, !dbg !40
224
+ %199 = or <8 x i64> %198, %26, !dbg !40
225
+ %200 = icmp ult <8 x i64> %199, <i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !41
226
+ %201 = extractelement <8 x i64> %199, i64 0, !dbg !42
227
+ %gep35 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %201, !dbg !42
228
+ %202 = extractelement <8 x i64> %199, i64 1, !dbg !42
229
+ %gep37 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %202, !dbg !42
230
+ %203 = extractelement <8 x i64> %199, i64 2, !dbg !42
231
+ %gep39 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %203, !dbg !42
232
+ %204 = extractelement <8 x i64> %199, i64 3, !dbg !42
233
+ %gep41 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %204, !dbg !42
234
+ %205 = extractelement <8 x i64> %199, i64 4, !dbg !42
235
+ %gep43 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %205, !dbg !42
236
+ %206 = extractelement <8 x i64> %199, i64 5, !dbg !42
237
+ %gep45 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %206, !dbg !42
238
+ %207 = extractelement <8 x i64> %199, i64 6, !dbg !42
239
+ %gep47 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %207, !dbg !42
240
+ %208 = extractelement <8 x i64> %199, i64 7, !dbg !42
241
+ %gep49 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %208, !dbg !42
242
+ %209 = extractelement <8 x i1> %200, i64 0, !dbg !43
243
+ %210 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep35, i1 %209, i16 0, i1 %209) #5, !dbg !43
244
+ %211 = extractelement <8 x i1> %200, i64 1, !dbg !43
245
+ %212 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep37, i1 %211, i16 0, i1 %211) #5, !dbg !43
246
+ %213 = extractelement <8 x i1> %200, i64 2, !dbg !43
247
+ %214 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep39, i1 %213, i16 0, i1 %213) #5, !dbg !43
248
+ %215 = extractelement <8 x i1> %200, i64 3, !dbg !43
249
+ %216 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep41, i1 %215, i16 0, i1 %215) #5, !dbg !43
250
+ %217 = extractelement <8 x i1> %200, i64 4, !dbg !43
251
+ %218 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep43, i1 %217, i16 0, i1 %217) #5, !dbg !43
252
+ %219 = extractelement <8 x i1> %200, i64 5, !dbg !43
253
+ %220 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep45, i1 %219, i16 0, i1 %219) #5, !dbg !43
254
+ %221 = extractelement <8 x i1> %200, i64 6, !dbg !43
255
+ %222 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep47, i1 %221, i16 0, i1 %221) #5, !dbg !43
256
+ %223 = extractelement <8 x i1> %200, i64 7, !dbg !43
257
+ %224 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep49, i1 %223, i16 0, i1 %223) #5, !dbg !43
258
+ %225 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %210) #5, !dbg !44
259
+ %226 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %212) #5, !dbg !44
260
+ %227 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %214) #5, !dbg !44
261
+ %228 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %216) #5, !dbg !44
262
+ %229 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %218) #5, !dbg !44
263
+ %230 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %220) #5, !dbg !44
264
+ %231 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %222) #5, !dbg !44
265
+ %232 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %224) #5, !dbg !44
266
+ %233 = fsub float %225, %188, !dbg !45
267
+ %234 = fsub float %226, %188, !dbg !45
268
+ %235 = fsub float %227, %188, !dbg !45
269
+ %236 = fsub float %228, %188, !dbg !45
270
+ %237 = fsub float %229, %188, !dbg !45
271
+ %238 = fsub float %230, %188, !dbg !45
272
+ %239 = fsub float %231, %188, !dbg !45
273
+ %240 = fsub float %232, %188, !dbg !45
274
+ %241 = fmul float %233, 0x3FF7154760000000, !dbg !46
275
+ %242 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %241) #5, !dbg !46
276
+ %243 = fmul float %234, 0x3FF7154760000000, !dbg !46
277
+ %244 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %243) #5, !dbg !46
278
+ %245 = fmul float %235, 0x3FF7154760000000, !dbg !46
279
+ %246 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %245) #5, !dbg !46
280
+ %247 = fmul float %236, 0x3FF7154760000000, !dbg !46
281
+ %248 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %247) #5, !dbg !46
282
+ %249 = fmul float %237, 0x3FF7154760000000, !dbg !46
283
+ %250 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %249) #5, !dbg !46
284
+ %251 = fmul float %238, 0x3FF7154760000000, !dbg !46
285
+ %252 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %251) #5, !dbg !46
286
+ %253 = fmul float %239, 0x3FF7154760000000, !dbg !46
287
+ %254 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %253) #5, !dbg !46
288
+ %255 = fmul float %240, 0x3FF7154760000000, !dbg !46
289
+ %256 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %255) #5, !dbg !46
290
+ %257 = insertelement <8 x float> poison, float %242, i64 0, !dbg !47
291
+ %258 = insertelement <8 x float> %257, float %244, i64 1, !dbg !47
292
+ %259 = insertelement <8 x float> %258, float %246, i64 2, !dbg !47
293
+ %260 = insertelement <8 x float> %259, float %248, i64 3, !dbg !47
294
+ %261 = insertelement <8 x float> %260, float %250, i64 4, !dbg !47
295
+ %262 = insertelement <8 x float> %261, float %252, i64 5, !dbg !47
296
+ %263 = insertelement <8 x float> %262, float %254, i64 6, !dbg !47
297
+ %264 = insertelement <8 x float> %263, float %256, i64 7, !dbg !47
298
+ %265 = select <8 x i1> %200, <8 x float> %264, <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, !dbg !47
299
+ %266 = fadd <8 x float> %195, %265, !dbg !47
300
+ %267 = add nuw nsw i32 %194, 2048, !dbg !39
301
+ %268 = icmp ult i32 %194, 48209, !dbg !39
302
+ br i1 %268, label %193, label %269, !dbg !39
303
+
304
+ 269: ; preds = %193
305
+ tail call void @llvm.nvvm.barrier0(), !dbg !48
306
+ %shift = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
307
+ %270 = fadd <8 x float> %266, %shift, !dbg !52
308
+ %shift95 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
309
+ %271 = fadd <8 x float> %shift95, %270, !dbg !52
310
+ %shift96 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
311
+ %272 = fadd <8 x float> %shift96, %271, !dbg !52
312
+ %shift97 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
313
+ %273 = fadd <8 x float> %shift97, %272, !dbg !52
314
+ %shift98 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
315
+ %274 = fadd <8 x float> %shift98, %273, !dbg !52
316
+ %shift99 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
317
+ %275 = fadd <8 x float> %shift99, %274, !dbg !52
318
+ %shift100 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
319
+ %276 = fadd <8 x float> %shift100, %275, !dbg !52
320
+ %277 = extractelement <8 x float> %276, i64 0, !dbg !52
321
+ %278 = bitcast float %277 to i32, !dbg !48
322
+ %279 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %278, i32 16, i32 31), !dbg !48
323
+ %280 = bitcast i32 %279 to float, !dbg !48
324
+ %281 = fadd float %277, %280, !dbg !52
325
+ %282 = bitcast float %281 to i32, !dbg !48
326
+ %283 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %282, i32 8, i32 31), !dbg !48
327
+ %284 = bitcast i32 %283 to float, !dbg !48
328
+ %285 = fadd float %281, %284, !dbg !52
329
+ %286 = bitcast float %285 to i32, !dbg !48
330
+ %287 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %286, i32 4, i32 31), !dbg !48
331
+ %288 = bitcast i32 %287 to float, !dbg !48
332
+ %289 = fadd float %285, %288, !dbg !52
333
+ %290 = bitcast float %289 to i32, !dbg !48
334
+ %291 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %290, i32 2, i32 31), !dbg !48
335
+ %292 = bitcast i32 %291 to float, !dbg !48
336
+ %293 = fadd float %289, %292, !dbg !52
337
+ %294 = bitcast float %293 to i32, !dbg !48
338
+ %295 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %294, i32 1, i32 31), !dbg !48
339
+ %296 = bitcast i32 %295 to float, !dbg !48
340
+ %297 = fadd float %293, %296, !dbg !52
341
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %159, float %297, i1 %157) #5, !dbg !48
342
+ tail call void @llvm.nvvm.barrier0(), !dbg !48
343
+ %298 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %162, i1 %160) #5, !dbg !48
344
+ %299 = bitcast float %298 to i32, !dbg !48
345
+ %300 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %299, i32 4, i32 31), !dbg !48
346
+ %301 = bitcast i32 %300 to float, !dbg !48
347
+ %302 = fadd float %298, %301, !dbg !52
348
+ %303 = bitcast float %302 to i32, !dbg !48
349
+ %304 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %303, i32 2, i32 31), !dbg !48
350
+ %305 = bitcast i32 %304 to float, !dbg !48
351
+ %306 = fadd float %302, %305, !dbg !52
352
+ %307 = bitcast float %306 to i32, !dbg !48
353
+ %308 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %307, i32 1, i32 31), !dbg !48
354
+ %309 = bitcast i32 %308 to float, !dbg !48
355
+ %310 = fadd float %306, %309, !dbg !52
356
+ tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %162, float %310, i1 %187) #5, !dbg !48
357
+ tail call void @llvm.nvvm.barrier0(), !dbg !48
358
+ %311 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !48
359
+ tail call void @llvm.nvvm.barrier0(), !dbg !56
360
+ %312 = insertelement <1 x float> undef, float %311, i64 0, !dbg !56
361
+ store <1 x float> %312, ptr addrspace(3) @global_smem, align 4, !dbg !56
362
+ tail call void @llvm.nvvm.barrier0(), !dbg !56
363
+ %313 = load i32, ptr addrspace(3) @global_smem, align 4, !dbg !56
364
+ %314 = getelementptr float, ptr addrspace(1) %2, i64 %17, !dbg !57
365
+ tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %313, ptr addrspace(1) %314, i1 %192) #5, !dbg !58
366
+ %315 = fcmp olt float %311, 0x3810000000000000, !dbg !59
367
+ %316 = fmul float %311, 0x4160000000000000, !dbg !59
368
+ %.02.i = select i1 %315, float %316, float %311, !dbg !59
369
+ %i.i.0.i = select i1 %315, float -2.300000e+01, float 0.000000e+00, !dbg !59
370
+ %317 = bitcast float %.02.i to i32, !dbg !59
371
+ %318 = add i32 %317, -1059760811, !dbg !59
372
+ %319 = and i32 %318, -8388608, !dbg !59
373
+ %320 = sub i32 %317, %319, !dbg !59
374
+ %321 = bitcast i32 %320 to float, !dbg !59
375
+ %322 = sitofp i32 %319 to float, !dbg !59
376
+ %323 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
377
+ %.not.i = icmp eq i32 %323, 0, !dbg !59
378
+ %324 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %322, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !59
379
+ %325 = tail call float @llvm.nvvm.fma.rn.f(float %322, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !59
380
+ %.08.i = select i1 %.not.i, float %325, float %324, !dbg !59
381
+ %326 = fadd float %321, -1.000000e+00, !dbg !59
382
+ %327 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
383
+ %.not1.i = icmp eq i32 %327, 0, !dbg !59
384
+ %328 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %326, float 0x3FC2073EC0000000) #5, !dbg !59
385
+ %329 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %326, float 0x3FC2073EC0000000) #5, !dbg !59
386
+ %.010.i = select i1 %.not1.i, float %329, float %328, !dbg !59
387
+ %330 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
388
+ %.not2.i = icmp eq i32 %330, 0, !dbg !59
389
+ %331 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i, float %326, float 0xBFBF19B980000000) #5, !dbg !59
390
+ %332 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i, float %326, float 0xBFBF19B980000000) #5, !dbg !59
391
+ %.011.i = select i1 %.not2.i, float %332, float %331, !dbg !59
392
+ %333 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
393
+ %.not3.i = icmp eq i32 %333, 0, !dbg !59
394
+ %334 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i, float %326, float 0x3FC1E52AA0000000) #5, !dbg !59
395
+ %335 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i, float %326, float 0x3FC1E52AA0000000) #5, !dbg !59
396
+ %.012.i = select i1 %.not3.i, float %335, float %334, !dbg !59
397
+ %336 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
398
+ %.not4.i = icmp eq i32 %336, 0, !dbg !59
399
+ %337 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i, float %326, float 0xBFC55B1720000000) #5, !dbg !59
400
+ %338 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i, float %326, float 0xBFC55B1720000000) #5, !dbg !59
401
+ %.09.i = select i1 %.not4.i, float %338, float %337, !dbg !59
402
+ %339 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
403
+ %.not5.i = icmp eq i32 %339, 0, !dbg !59
404
+ %340 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i, float %326, float 0x3FC99DA160000000) #5, !dbg !59
405
+ %341 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i, float %326, float 0x3FC99DA160000000) #5, !dbg !59
406
+ %.05.i = select i1 %.not5.i, float %341, float %340, !dbg !59
407
+ %342 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
408
+ %.not6.i = icmp eq i32 %342, 0, !dbg !59
409
+ %343 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %326, float 0xBFCFFFE440000000) #5, !dbg !59
410
+ %344 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %326, float 0xBFCFFFE440000000) #5, !dbg !59
411
+ %.01.i = select i1 %.not6.i, float %344, float %343, !dbg !59
412
+ %345 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
413
+ %.not7.i = icmp eq i32 %345, 0, !dbg !59
414
+ %346 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i, float %326, float 0x3FD5554F00000000) #5, !dbg !59
415
+ %347 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i, float %326, float 0x3FD5554F00000000) #5, !dbg !59
416
+ %.0.i = select i1 %.not7.i, float %347, float %346, !dbg !59
417
+ %348 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
418
+ %.not8.i = icmp eq i32 %348, 0, !dbg !59
419
+ %349 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i, float %326, float -5.000000e-01) #5, !dbg !59
420
+ %350 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i, float %326, float -5.000000e-01) #5, !dbg !59
421
+ %.07.i = select i1 %.not8.i, float %350, float %349, !dbg !59
422
+ %351 = fmul float %326, %.07.i, !dbg !59
423
+ %352 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
424
+ %.not9.i = icmp eq i32 %352, 0, !dbg !59
425
+ %353 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %351, float %326, float %326) #5, !dbg !59
426
+ %354 = tail call float @llvm.nvvm.fma.rn.f(float %351, float %326, float %326) #5, !dbg !59
427
+ %.06.i = select i1 %.not9.i, float %354, float %353, !dbg !59
428
+ %355 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
429
+ %.not10.i = icmp eq i32 %355, 0, !dbg !59
430
+ %356 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !59
431
+ %357 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !59
432
+ %.04.i = select i1 %.not10.i, float %357, float %356, !dbg !59
433
+ %358 = icmp ugt i32 %317, 2139095039, !dbg !59
434
+ br i1 %358, label %__nv_fmaf_rn.exit.i.i, label %__nv_logf.exit, !dbg !59
435
+
436
+ __nv_fmaf_rn.exit.i.i: ; preds = %269
437
+ %359 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
438
+ %.not11.i = icmp eq i32 %359, 0, !dbg !59
439
+ %360 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !59
440
+ %361 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !59
441
+ %.03.i = select i1 %.not11.i, float %361, float %360, !dbg !59
442
+ br label %__nv_logf.exit, !dbg !59
443
+
444
+ __nv_logf.exit: ; preds = %269, %__nv_fmaf_rn.exit.i.i
445
+ %r.i.0.i = phi float [ %.03.i, %__nv_fmaf_rn.exit.i.i ], [ %.04.i, %269 ], !dbg !59
446
+ %362 = fcmp oeq float %.02.i, 0.000000e+00, !dbg !59
447
+ %r.i.1.i = select i1 %362, float 0xFFF0000000000000, float %r.i.0.i, !dbg !59
448
+ %363 = extractelement <8 x i64> %26, i64 0, !dbg !60
449
+ %364 = extractelement <8 x i64> %26, i64 1, !dbg !60
450
+ %365 = extractelement <8 x i64> %26, i64 2, !dbg !60
451
+ %366 = extractelement <8 x i64> %26, i64 3, !dbg !60
452
+ %367 = extractelement <8 x i64> %26, i64 4, !dbg !60
453
+ %368 = extractelement <8 x i64> %26, i64 5, !dbg !60
454
+ %369 = extractelement <8 x i64> %26, i64 6, !dbg !60
455
+ %370 = extractelement <8 x i64> %26, i64 7, !dbg !60
456
+ br label %371, !dbg !61
457
+
458
+ 371: ; preds = %__nv_logf.exit, %371
459
+ %372 = phi i32 [ 0, %__nv_logf.exit ], [ %454, %371 ]
460
+ %373 = zext nneg i32 %372 to i64, !dbg !60
461
+ %374 = or i64 %363, %373, !dbg !60
462
+ %375 = or i64 %364, %373, !dbg !60
463
+ %376 = or i64 %365, %373, !dbg !60
464
+ %377 = or i64 %366, %373, !dbg !60
465
+ %378 = or i64 %367, %373, !dbg !60
466
+ %379 = or i64 %368, %373, !dbg !60
467
+ %380 = or i64 %369, %373, !dbg !60
468
+ %381 = or i64 %370, %373, !dbg !60
469
+ %382 = icmp ult i64 %374, 50257, !dbg !62
470
+ %383 = icmp ult i64 %375, 50257, !dbg !62
471
+ %384 = icmp ult i64 %376, 50257, !dbg !62
472
+ %385 = icmp ult i64 %377, 50257, !dbg !62
473
+ %386 = icmp ult i64 %378, 50257, !dbg !62
474
+ %387 = icmp ult i64 %379, 50257, !dbg !62
475
+ %388 = icmp ult i64 %380, 50257, !dbg !62
476
+ %389 = icmp ult i64 %381, 50257, !dbg !62
477
+ %390 = add nsw i64 %374, %27, !dbg !63
478
+ %391 = add nsw i64 %375, %27, !dbg !63
479
+ %392 = add nsw i64 %376, %27, !dbg !63
480
+ %393 = add nsw i64 %377, %27, !dbg !63
481
+ %394 = add nsw i64 %378, %27, !dbg !63
482
+ %395 = add nsw i64 %379, %27, !dbg !63
483
+ %396 = add nsw i64 %380, %27, !dbg !63
484
+ %397 = add nsw i64 %381, %27, !dbg !63
485
+ %398 = getelementptr i16, ptr addrspace(1) %0, i64 %390, !dbg !64
486
+ %399 = getelementptr i16, ptr addrspace(1) %0, i64 %391, !dbg !64
487
+ %400 = getelementptr i16, ptr addrspace(1) %0, i64 %392, !dbg !64
488
+ %401 = getelementptr i16, ptr addrspace(1) %0, i64 %393, !dbg !64
489
+ %402 = getelementptr i16, ptr addrspace(1) %0, i64 %394, !dbg !64
490
+ %403 = getelementptr i16, ptr addrspace(1) %0, i64 %395, !dbg !64
491
+ %404 = getelementptr i16, ptr addrspace(1) %0, i64 %396, !dbg !64
492
+ %405 = getelementptr i16, ptr addrspace(1) %0, i64 %397, !dbg !64
493
+ %406 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %398, i1 %382, i16 0, i1 %382) #5, !dbg !65
494
+ %407 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %399, i1 %383, i16 0, i1 %383) #5, !dbg !65
495
+ %408 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %400, i1 %384, i16 0, i1 %384) #5, !dbg !65
496
+ %409 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %401, i1 %385, i16 0, i1 %385) #5, !dbg !65
497
+ %410 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %402, i1 %386, i16 0, i1 %386) #5, !dbg !65
498
+ %411 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %403, i1 %387, i16 0, i1 %387) #5, !dbg !65
499
+ %412 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %404, i1 %388, i16 0, i1 %388) #5, !dbg !65
500
+ %413 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %405, i1 %389, i16 0, i1 %389) #5, !dbg !65
501
+ %414 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %406) #5, !dbg !66
502
+ %415 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %407) #5, !dbg !66
503
+ %416 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %408) #5, !dbg !66
504
+ %417 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %409) #5, !dbg !66
505
+ %418 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %410) #5, !dbg !66
506
+ %419 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %411) #5, !dbg !66
507
+ %420 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %412) #5, !dbg !66
508
+ %421 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %413) #5, !dbg !66
509
+ %422 = fsub float %414, %188, !dbg !67
510
+ %423 = fsub float %415, %188, !dbg !67
511
+ %424 = fsub float %416, %188, !dbg !67
512
+ %425 = fsub float %417, %188, !dbg !67
513
+ %426 = fsub float %418, %188, !dbg !67
514
+ %427 = fsub float %419, %188, !dbg !67
515
+ %428 = fsub float %420, %188, !dbg !67
516
+ %429 = fsub float %421, %188, !dbg !67
517
+ %430 = fsub float %422, %r.i.1.i, !dbg !68
518
+ %431 = fsub float %423, %r.i.1.i, !dbg !68
519
+ %432 = fsub float %424, %r.i.1.i, !dbg !68
520
+ %433 = fsub float %425, %r.i.1.i, !dbg !68
521
+ %434 = fsub float %426, %r.i.1.i, !dbg !68
522
+ %435 = fsub float %427, %r.i.1.i, !dbg !68
523
+ %436 = fsub float %428, %r.i.1.i, !dbg !68
524
+ %437 = fsub float %429, %r.i.1.i, !dbg !68
525
+ %438 = getelementptr i16, ptr addrspace(1) %3, i64 %390, !dbg !69
526
+ %439 = getelementptr i16, ptr addrspace(1) %3, i64 %391, !dbg !69
527
+ %440 = getelementptr i16, ptr addrspace(1) %3, i64 %392, !dbg !69
528
+ %441 = getelementptr i16, ptr addrspace(1) %3, i64 %393, !dbg !69
529
+ %442 = getelementptr i16, ptr addrspace(1) %3, i64 %394, !dbg !69
530
+ %443 = getelementptr i16, ptr addrspace(1) %3, i64 %395, !dbg !69
531
+ %444 = getelementptr i16, ptr addrspace(1) %3, i64 %396, !dbg !69
532
+ %445 = getelementptr i16, ptr addrspace(1) %3, i64 %397, !dbg !69
533
+ %446 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %430) #5, !dbg !70
534
+ %447 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %431) #5, !dbg !70
535
+ %448 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %432) #5, !dbg !70
536
+ %449 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %433) #5, !dbg !70
537
+ %450 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %434) #5, !dbg !70
538
+ %451 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %435) #5, !dbg !70
539
+ %452 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %436) #5, !dbg !70
540
+ %453 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %437) #5, !dbg !70
541
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %446, ptr addrspace(1) %438, i1 %382) #5, !dbg !70
542
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %447, ptr addrspace(1) %439, i1 %383) #5, !dbg !70
543
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %448, ptr addrspace(1) %440, i1 %384) #5, !dbg !70
544
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %449, ptr addrspace(1) %441, i1 %385) #5, !dbg !70
545
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %450, ptr addrspace(1) %442, i1 %386) #5, !dbg !70
546
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %451, ptr addrspace(1) %443, i1 %387) #5, !dbg !70
547
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %452, ptr addrspace(1) %444, i1 %388) #5, !dbg !70
548
+ tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %453, ptr addrspace(1) %445, i1 %389) #5, !dbg !70
549
+ %454 = add nuw nsw i32 %372, 2048, !dbg !61
550
+ %455 = icmp ult i32 %372, 48209, !dbg !61
551
+ br i1 %455, label %371, label %456, !dbg !61
552
+
553
+ 456: ; preds = %371
554
+ ret void, !dbg !71
555
+ }
556
+
557
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
558
+ declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
559
+
560
+ ; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
561
+ declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
562
+
563
+ ; Function Attrs: convergent nocallback nounwind
564
+ declare void @llvm.nvvm.barrier0() #2
565
+
566
+ ; Function Attrs: alwaysinline nounwind
567
+ define float @__nv_logf(float %a) local_unnamed_addr #3 {
568
+ __nv_fmaf_rn.exit10.i:
569
+ %0 = fcmp olt float %a, 0x3810000000000000
570
+ %1 = fmul float %a, 0x4160000000000000
571
+ %.02 = select i1 %0, float %1, float %a
572
+ %i.i.0 = select i1 %0, float -2.300000e+01, float 0.000000e+00
573
+ %2 = bitcast float %.02 to i32
574
+ %3 = add i32 %2, -1059760811
575
+ %4 = and i32 %3, -8388608
576
+ %5 = sub i32 %2, %4
577
+ %6 = bitcast i32 %5 to float
578
+ %7 = sitofp i32 %4 to float
579
+ %8 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
580
+ %.not = icmp eq i32 %8, 0
581
+ %9 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %7, float 0x3E80000000000000, float %i.i.0) #5
582
+ %10 = tail call float @llvm.nvvm.fma.rn.f(float %7, float 0x3E80000000000000, float %i.i.0) #5
583
+ %.08 = select i1 %.not, float %10, float %9
584
+ %11 = fadd float %6, -1.000000e+00
585
+ %12 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
586
+ %.not1 = icmp eq i32 %12, 0
587
+ %13 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5
588
+ %14 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5
589
+ %.010 = select i1 %.not1, float %14, float %13
590
+ %15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
591
+ %.not2 = icmp eq i32 %15, 0
592
+ %16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010, float %11, float 0xBFBF19B980000000) #5
593
+ %17 = tail call float @llvm.nvvm.fma.rn.f(float %.010, float %11, float 0xBFBF19B980000000) #5
594
+ %.011 = select i1 %.not2, float %17, float %16
595
+ %18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
596
+ %.not3 = icmp eq i32 %18, 0
597
+ %19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5
598
+ %20 = tail call float @llvm.nvvm.fma.rn.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5
599
+ %.012 = select i1 %.not3, float %20, float %19
600
+ %21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
601
+ %.not4 = icmp eq i32 %21, 0
602
+ %22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012, float %11, float 0xBFC55B1720000000) #5
603
+ %23 = tail call float @llvm.nvvm.fma.rn.f(float %.012, float %11, float 0xBFC55B1720000000) #5
604
+ %.09 = select i1 %.not4, float %23, float %22
605
+ %24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
606
+ %.not5 = icmp eq i32 %24, 0
607
+ %25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09, float %11, float 0x3FC99DA160000000) #5
608
+ %26 = tail call float @llvm.nvvm.fma.rn.f(float %.09, float %11, float 0x3FC99DA160000000) #5
609
+ %.05 = select i1 %.not5, float %26, float %25
610
+ %27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
611
+ %.not6 = icmp eq i32 %27, 0
612
+ %28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %11, float 0xBFCFFFE440000000) #5
613
+ %29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %11, float 0xBFCFFFE440000000) #5
614
+ %.01 = select i1 %.not6, float %29, float %28
615
+ %30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
616
+ %.not7 = icmp eq i32 %30, 0
617
+ %31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01, float %11, float 0x3FD5554F00000000) #5
618
+ %32 = tail call float @llvm.nvvm.fma.rn.f(float %.01, float %11, float 0x3FD5554F00000000) #5
619
+ %.0 = select i1 %.not7, float %32, float %31
620
+ %33 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
621
+ %.not8 = icmp eq i32 %33, 0
622
+ %34 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0, float %11, float -5.000000e-01) #5
623
+ %35 = tail call float @llvm.nvvm.fma.rn.f(float %.0, float %11, float -5.000000e-01) #5
624
+ %.07 = select i1 %.not8, float %35, float %34
625
+ %36 = fmul float %11, %.07
626
+ %37 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
627
+ %.not9 = icmp eq i32 %37, 0
628
+ %38 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %36, float %11, float %11) #5
629
+ %39 = tail call float @llvm.nvvm.fma.rn.f(float %36, float %11, float %11) #5
630
+ %.06 = select i1 %.not9, float %39, float %38
631
+ %40 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
632
+ %.not10 = icmp eq i32 %40, 0
633
+ %41 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08, float 0x3FE62E4300000000, float %.06) #5
634
+ %42 = tail call float @llvm.nvvm.fma.rn.f(float %.08, float 0x3FE62E4300000000, float %.06) #5
635
+ %.04 = select i1 %.not10, float %42, float %41
636
+ %43 = icmp ugt i32 %2, 2139095039
637
+ br i1 %43, label %__nv_fmaf_rn.exit.i, label %__internal_accurate_logf.exit
638
+
639
+ __nv_fmaf_rn.exit.i: ; preds = %__nv_fmaf_rn.exit10.i
640
+ %44 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
641
+ %.not11 = icmp eq i32 %44, 0
642
+ %45 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5
643
+ %46 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5
644
+ %.03 = select i1 %.not11, float %46, float %45
645
+ br label %__internal_accurate_logf.exit
646
+
647
+ __internal_accurate_logf.exit: ; preds = %__nv_fmaf_rn.exit.i, %__nv_fmaf_rn.exit10.i
648
+ %r.i.0 = phi float [ %.03, %__nv_fmaf_rn.exit.i ], [ %.04, %__nv_fmaf_rn.exit10.i ]
649
+ %47 = fcmp oeq float %.02, 0.000000e+00
650
+ %r.i.1 = select i1 %47, float 0xFFF0000000000000, float %r.i.0
651
+ ret float %r.i.1
652
+ }
653
+
654
+ declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
655
+
656
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
657
+ declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0
658
+
659
+ ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
660
+ declare float @llvm.nvvm.fma.rn.f(float, float, float) #0
661
+
662
+ attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
663
+ attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
664
+ attributes #2 = { convergent nocallback nounwind }
665
+ attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
666
+ attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
667
+ attributes #5 = { nounwind }
668
+
669
+ !llvm.module.flags = !{!0, !1}
670
+ !llvm.dbg.cu = !{!2}
671
+ !nvvm.annotations = !{!4, !5, !5, !4}
672
+ !llvm.ident = !{!6}
673
+
674
+ !0 = !{i32 2, !"Debug Info Version", i32 3}
675
+ !1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
676
+ !2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
677
+ !3 = !DIFile(filename: "cgu6pijtlte2d3yicmpedfh2c7wgxsaexd6ichnxwbwh4deqe6ck.py", directory: "/tmp/torchinductor_root/gu")
678
+ !4 = !{ptr @triton__0d1d2d3d4de5, !"kernel", i32 1}
679
+ !5 = !{ptr @triton__0d1d2d3d4de5, !"maxntidx", i32 256}
680
+ !6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
681
+ !7 = distinct !DISubprogram(name: "triton__0d1d2d3d4de5", linkageName: "triton__0d1d2d3d4de5", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
682
+ !8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
683
+ !9 = !{}
684
+ !10 = !DILocation(line: 24, column: 33, scope: !7)
685
+ !11 = !DILocation(line: 21, column: 28, scope: !7)
686
+ !12 = !DILocation(line: 21, column: 34, scope: !7)
687
+ !13 = !DILocation(line: 31, column: 46, scope: !7)
688
+ !14 = !DILocation(line: 27, column: 36, scope: !7)
689
+ !15 = !DILocation(line: 28, column: 27, scope: !7)
690
+ !16 = !DILocation(line: 38, column: 21, scope: !17, inlinedAt: !19)
691
+ !17 = distinct !DILexicalBlockFile(scope: !7, file: !18, discriminator: 0)
692
+ !18 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
693
+ !19 = !DILocation(line: 34, column: 45, scope: !17)
694
+ !20 = !DILocation(line: 29, column: 25, scope: !7)
695
+ !21 = !DILocation(line: 31, column: 34, scope: !7)
696
+ !22 = !DILocation(line: 31, column: 52, scope: !7)
697
+ !23 = !DILocation(line: 31, column: 103, scope: !7)
698
+ !24 = !DILocation(line: 36, column: 15, scope: !17, inlinedAt: !19)
699
+ !25 = !DILocation(line: 38, column: 16, scope: !17, inlinedAt: !19)
700
+ !26 = !DILocation(line: 0, scope: !7)
701
+ !27 = !DILocation(line: 36, column: 15, scope: !28, inlinedAt: !29)
702
+ !28 = distinct !DILexicalBlockFile(scope: !17, file: !18, discriminator: 0)
703
+ !29 = !DILocation(line: 49, column: 29, scope: !28, inlinedAt: !30)
704
+ !30 = !DILocation(line: 36, column: 38, scope: !28)
705
+ !31 = !DILocation(line: 38, column: 21, scope: !28, inlinedAt: !29)
706
+ !32 = !DILocation(line: 38, column: 16, scope: !28, inlinedAt: !29)
707
+ !33 = !DILocation(line: 39, column: 29, scope: !28, inlinedAt: !29)
708
+ !34 = !DILocation(line: 49, column: 29, scope: !17, inlinedAt: !35)
709
+ !35 = !DILocation(line: 36, column: 38, scope: !17)
710
+ !36 = !DILocation(line: 36, column: 41, scope: !7)
711
+ !37 = !DILocation(line: 37, column: 25, scope: !7)
712
+ !38 = !DILocation(line: 37, column: 36, scope: !7)
713
+ !39 = !DILocation(line: 39, column: 36, scope: !7)
714
+ !40 = !DILocation(line: 40, column: 27, scope: !7)
715
+ !41 = !DILocation(line: 41, column: 25, scope: !7)
716
+ !42 = !DILocation(line: 43, column: 34, scope: !7)
717
+ !43 = !DILocation(line: 43, column: 52, scope: !7)
718
+ !44 = !DILocation(line: 43, column: 103, scope: !7)
719
+ !45 = !DILocation(line: 45, column: 22, scope: !7)
720
+ !46 = !DILocation(line: 46, column: 22, scope: !7)
721
+ !47 = !DILocation(line: 49, column: 40, scope: !7)
722
+ !48 = !DILocation(line: 243, column: 36, scope: !49, inlinedAt: !51)
723
+ !49 = distinct !DILexicalBlockFile(scope: !7, file: !50, discriminator: 0)
724
+ !50 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
725
+ !51 = !DILocation(line: 50, column: 27, scope: !49)
726
+ !52 = !DILocation(line: 233, column: 15, scope: !53, inlinedAt: !54)
727
+ !53 = distinct !DILexicalBlockFile(scope: !49, file: !50, discriminator: 0)
728
+ !54 = !DILocation(line: 243, column: 36, scope: !53, inlinedAt: !55)
729
+ !55 = !DILocation(line: 50, column: 27, scope: !53)
730
+ !56 = !DILocation(line: 50, column: 30, scope: !7)
731
+ !57 = !DILocation(line: 51, column: 25, scope: !7)
732
+ !58 = !DILocation(line: 51, column: 37, scope: !7)
733
+ !59 = !DILocation(line: 59, column: 23, scope: !7)
734
+ !60 = !DILocation(line: 53, column: 27, scope: !7)
735
+ !61 = !DILocation(line: 52, column: 36, scope: !7)
736
+ !62 = !DILocation(line: 54, column: 25, scope: !7)
737
+ !63 = !DILocation(line: 56, column: 41, scope: !7)
738
+ !64 = !DILocation(line: 56, column: 35, scope: !7)
739
+ !65 = !DILocation(line: 56, column: 53, scope: !7)
740
+ !66 = !DILocation(line: 56, column: 105, scope: !7)
741
+ !67 = !DILocation(line: 58, column: 24, scope: !7)
742
+ !68 = !DILocation(line: 60, column: 24, scope: !7)
743
+ !69 = !DILocation(line: 62, column: 29, scope: !7)
744
+ !70 = !DILocation(line: 62, column: 54, scope: !7)
745
+ !71 = !DILocation(line: 52, column: 4, scope: !7)
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.ttir ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1d2d3d4de5(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: i64 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg5: i64) attributes {noinline = false} {
3
+ %cst = arith.constant dense<0.000000e+00> : tensor<1x2048xbf16>
4
+ %c50257_i64 = arith.constant 50257 : i64
5
+ %cst_0 = arith.constant dense<true> : tensor<1x2048xi1>
6
+ %c50257_i32 = arith.constant 50257 : i32
7
+ %c2048_i32 = arith.constant 2048 : i32
8
+ %c0_i32 = arith.constant 0 : i32
9
+ %cst_1 = arith.constant dense<50257> : tensor<1x2048xi64>
10
+ %cst_2 = arith.constant dense<0.000000e+00> : tensor<1x2048xf32>
11
+ %cst_3 = arith.constant dense<0xFF800000> : tensor<1x2048xf32>
12
+ %0 = tt.get_program_id x : i32
13
+ %1 = arith.extsi %0 : i32 to i64
14
+ %2 = tt.make_range {end = 2048 : i32, start = 0 : i32} : tensor<2048xi32>
15
+ %3 = tt.expand_dims %2 {axis = 0 : i32} : (tensor<2048xi32>) -> tensor<1x2048xi32>
16
+ %4 = arith.extsi %3 : tensor<1x2048xi32> to tensor<1x2048xi64>
17
+ %5 = arith.muli %1, %c50257_i64 : i64
18
+ %6 = tt.splat %5 : (i64) -> tensor<1x2048xi64>
19
+ %7 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
20
+ %8 = scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 iter_args(%arg7 = %cst_3) -> (tensor<1x2048xf32>) : i32 {
21
+ %29 = arith.extsi %arg6 : i32 to i64
22
+ %30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
23
+ %31 = arith.addi %30, %4 : tensor<1x2048xi64>
24
+ %32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
25
+ %33 = arith.addi %31, %6 : tensor<1x2048xi64>
26
+ %34 = tt.addptr %7, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
27
+ %35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x2048xbf16>
28
+ %36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
29
+ %37 = arith.cmpf ogt, %arg7, %36 : tensor<1x2048xf32>
30
+ %38 = arith.cmpf une, %arg7, %arg7 : tensor<1x2048xf32>
31
+ %39 = arith.ori %37, %38 : tensor<1x2048xi1>
32
+ %40 = arith.xori %39, %cst_0 : tensor<1x2048xi1>
33
+ %41 = arith.andi %32, %40 : tensor<1x2048xi1>
34
+ %42 = arith.select %41, %36, %arg7 : tensor<1x2048xi1>, tensor<1x2048xf32>
35
+ scf.yield %42 : tensor<1x2048xf32>
36
+ }
37
+ %9 = "tt.reduce"(%8) <{axis = 1 : i32}> ({
38
+ ^bb0(%arg6: f32, %arg7: f32):
39
+ %29 = arith.cmpf ogt, %arg6, %arg7 : f32
40
+ %30 = arith.cmpf une, %arg6, %arg6 : f32
41
+ %31 = arith.ori %29, %30 : i1
42
+ %32 = arith.select %31, %arg6, %arg7 : f32
43
+ tt.reduce.return %32 : f32
44
+ }) : (tensor<1x2048xf32>) -> tensor<1xf32>
45
+ %10 = tt.expand_dims %9 {axis = 1 : i32} : (tensor<1xf32>) -> tensor<1x1xf32>
46
+ %11 = tt.addptr %arg1, %1 : !tt.ptr<f32, 1>, i64
47
+ %12 = tt.splat %11 : (!tt.ptr<f32, 1>) -> tensor<1x1x!tt.ptr<f32, 1>>
48
+ tt.store %12, %10 {cache = 1 : i32, evict = 1 : i32} : tensor<1x1xf32>
49
+ %13 = arith.muli %1, %c50257_i64 : i64
50
+ %14 = tt.splat %13 : (i64) -> tensor<1x2048xi64>
51
+ %15 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
52
+ %16 = tt.broadcast %10 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
53
+ %17 = scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 iter_args(%arg7 = %cst_2) -> (tensor<1x2048xf32>) : i32 {
54
+ %29 = arith.extsi %arg6 : i32 to i64
55
+ %30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
56
+ %31 = arith.addi %30, %4 : tensor<1x2048xi64>
57
+ %32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
58
+ %33 = arith.addi %31, %14 : tensor<1x2048xi64>
59
+ %34 = tt.addptr %15, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
60
+ %35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x2048xbf16>
61
+ %36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
62
+ %37 = arith.subf %36, %16 : tensor<1x2048xf32>
63
+ %38 = math.exp %37 : tensor<1x2048xf32>
64
+ %39 = arith.addf %arg7, %38 : tensor<1x2048xf32>
65
+ %40 = arith.select %32, %39, %arg7 : tensor<1x2048xi1>, tensor<1x2048xf32>
66
+ scf.yield %40 : tensor<1x2048xf32>
67
+ }
68
+ %18 = "tt.reduce"(%17) <{axis = 1 : i32}> ({
69
+ ^bb0(%arg6: f32, %arg7: f32):
70
+ %29 = arith.addf %arg6, %arg7 : f32
71
+ tt.reduce.return %29 : f32
72
+ }) : (tensor<1x2048xf32>) -> tensor<1xf32>
73
+ %19 = tt.expand_dims %18 {axis = 1 : i32} : (tensor<1xf32>) -> tensor<1x1xf32>
74
+ %20 = tt.addptr %arg2, %1 : !tt.ptr<f32, 1>, i64
75
+ %21 = tt.splat %20 : (!tt.ptr<f32, 1>) -> tensor<1x1x!tt.ptr<f32, 1>>
76
+ tt.store %21, %19 {cache = 1 : i32, evict = 1 : i32} : tensor<1x1xf32>
77
+ %22 = arith.muli %1, %c50257_i64 : i64
78
+ %23 = tt.splat %22 : (i64) -> tensor<1x2048xi64>
79
+ %24 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
80
+ %25 = tt.broadcast %10 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
81
+ %26 = math.log %19 : tensor<1x1xf32>
82
+ %27 = tt.broadcast %26 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
83
+ %28 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
84
+ scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 : i32 {
85
+ %29 = arith.extsi %arg6 : i32 to i64
86
+ %30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
87
+ %31 = arith.addi %30, %4 : tensor<1x2048xi64>
88
+ %32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
89
+ %33 = arith.addi %31, %23 : tensor<1x2048xi64>
90
+ %34 = tt.addptr %24, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
91
+ %35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<1x2048xbf16>
92
+ %36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
93
+ %37 = arith.subf %36, %25 : tensor<1x2048xf32>
94
+ %38 = arith.subf %37, %27 : tensor<1x2048xf32>
95
+ %39 = tt.addptr %28, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
96
+ %40 = arith.truncf %38 : tensor<1x2048xf32> to tensor<1x2048xbf16>
97
+ tt.store %39, %40, %32 {cache = 1 : i32, evict = 1 : i32} : tensor<1x2048xbf16>
98
+ }
99
+ tt.return
100
+ }
101
+ }
.triton/dump/7dc5bb3e5c2bb99527fff34c6fba7810/triton_.ptx ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1de
10
+
11
+ .visible .entry triton__0d1de(
12
+ .param .u64 triton__0d1de_param_0,
13
+ .param .u32 triton__0d1de_param_1
14
+ )
15
+ .maxntid 128, 1, 1
16
+ {
17
+ .reg .pred %p<2>;
18
+ .reg .b32 %r<6>;
19
+ .reg .b64 %rd<5>;
20
+ .loc 1 18 0
21
+ $L__func_begin0:
22
+ .loc 1 18 0
23
+
24
+ ld.param.u64 %rd3, [triton__0d1de_param_0];
25
+ $L__tmp0:
26
+ .loc 1 21 36
27
+ mov.u32 %r2, %tid.x;
28
+ and.b32 %r3, %r2, 127;
29
+ .loc 1 20 28
30
+ mov.u32 %r1, %ctaid.x;
31
+ .loc 1 20 33
32
+ shl.b32 %r4, %r1, 7;
33
+ .loc 1 21 23
34
+ or.b32 %r5, %r4, %r3;
35
+ .loc 1 22 21
36
+ setp.lt.s32 %p1, %r5, 512;
37
+ .loc 1 25 25
38
+ cvt.s64.s32 %rd1, %r5;
39
+ mul.wide.s32 %rd4, %r5, 8;
40
+ add.s64 %rd2, %rd3, %rd4;
41
+ .loc 1 25 36
42
+ @%p1 st.global.b64 [ %rd2 + 0 ], { %rd1 };
43
+ .loc 1 25 4
44
+ ret;
45
+ $L__tmp1:
46
+ $L__func_end0:
47
+
48
+ }
49
+ .file 1 "/tmp/torchinductor_root/wx/cwxxgxdevnyc453z7hh4nxzgmvlhh6suwokktps3dw62btskgxt4.py"
50
+ .section .debug_abbrev
51
+ {
52
+ .b8 1
53
+ .b8 17
54
+ .b8 1
55
+ .b8 37
56
+ .b8 8
57
+ .b8 19
58
+ .b8 5
59
+ .b8 3
60
+ .b8 8
61
+ .b8 16
62
+ .b8 6
63
+ .b8 27
64
+ .b8 8
65
+ .b8 180
66
+ .b8 66
67
+ .b8 12
68
+ .b8 17
69
+ .b8 1
70
+ .b8 18
71
+ .b8 1
72
+ .b8 0
73
+ .b8 0
74
+ .b8 2
75
+ .b8 46
76
+ .b8 0
77
+ .b8 17
78
+ .b8 1
79
+ .b8 18
80
+ .b8 1
81
+ .b8 64
82
+ .b8 10
83
+ .b8 135
84
+ .b8 64
85
+ .b8 8
86
+ .b8 3
87
+ .b8 8
88
+ .b8 58
89
+ .b8 11
90
+ .b8 59
91
+ .b8 11
92
+ .b8 63
93
+ .b8 12
94
+ .b8 0
95
+ .b8 0
96
+ .b8 0
97
+ }
98
+ .section .debug_info
99
+ {
100
+ .b32 172
101
+ .b8 2
102
+ .b8 0
103
+ .b32 .debug_abbrev
104
+ .b8 8
105
+ .b8 1
106
+ .b8 116
107
+ .b8 114
108
+ .b8 105
109
+ .b8 116
110
+ .b8 111
111
+ .b8 110
112
+ .b8 0
113
+ .b8 2
114
+ .b8 0
115
+ .b8 99
116
+ .b8 119
117
+ .b8 120
118
+ .b8 120
119
+ .b8 103
120
+ .b8 120
121
+ .b8 100
122
+ .b8 101
123
+ .b8 118
124
+ .b8 110
125
+ .b8 121
126
+ .b8 99
127
+ .b8 52
128
+ .b8 53
129
+ .b8 51
130
+ .b8 122
131
+ .b8 55
132
+ .b8 104
133
+ .b8 104
134
+ .b8 52
135
+ .b8 110
136
+ .b8 120
137
+ .b8 122
138
+ .b8 103
139
+ .b8 109
140
+ .b8 118
141
+ .b8 108
142
+ .b8 104
143
+ .b8 104
144
+ .b8 54
145
+ .b8 115
146
+ .b8 117
147
+ .b8 119
148
+ .b8 111
149
+ .b8 107
150
+ .b8 107
151
+ .b8 116
152
+ .b8 112
153
+ .b8 115
154
+ .b8 51
155
+ .b8 100
156
+ .b8 119
157
+ .b8 54
158
+ .b8 50
159
+ .b8 98
160
+ .b8 116
161
+ .b8 115
162
+ .b8 107
163
+ .b8 103
164
+ .b8 120
165
+ .b8 116
166
+ .b8 52
167
+ .b8 46
168
+ .b8 112
169
+ .b8 121
170
+ .b8 0
171
+ .b32 .debug_line
172
+ .b8 47
173
+ .b8 116
174
+ .b8 109
175
+ .b8 112
176
+ .b8 47
177
+ .b8 116
178
+ .b8 111
179
+ .b8 114
180
+ .b8 99
181
+ .b8 104
182
+ .b8 105
183
+ .b8 110
184
+ .b8 100
185
+ .b8 117
186
+ .b8 99
187
+ .b8 116
188
+ .b8 111
189
+ .b8 114
190
+ .b8 95
191
+ .b8 114
192
+ .b8 111
193
+ .b8 111
194
+ .b8 116
195
+ .b8 47
196
+ .b8 119
197
+ .b8 120
198
+ .b8 0
199
+ .b8 1
200
+ .b64 $L__func_begin0
201
+ .b64 $L__func_end0
202
+ .b8 2
203
+ .b64 $L__func_begin0
204
+ .b64 $L__func_end0
205
+ .b8 1
206
+ .b8 156
207
+ .b8 116
208
+ .b8 114
209
+ .b8 105
210
+ .b8 116
211
+ .b8 111
212
+ .b8 110
213
+ .b8 95
214
+ .b8 95
215
+ .b8 48
216
+ .b8 100
217
+ .b8 49
218
+ .b8 100
219
+ .b8 101
220
+ .b8 0
221
+ .b8 116
222
+ .b8 114
223
+ .b8 105
224
+ .b8 116
225
+ .b8 111
226
+ .b8 110
227
+ .b8 95
228
+ .b8 95
229
+ .b8 48
230
+ .b8 100
231
+ .b8 49
232
+ .b8 100
233
+ .b8 101
234
+ .b8 0
235
+ .b8 1
236
+ .b8 18
237
+ .b8 1
238
+ .b8 0
239
+ }
240
+ .section .debug_pubnames
241
+ {
242
+ .b32 $L__pubNames_end0-$L__pubNames_start0
243
+ $L__pubNames_start0:
244
+ .b8 2
245
+ .b8 0
246
+ .b32 .debug_info
247
+ .b32 176
248
+ .b32 125
249
+ .b8 116
250
+ .b8 114
251
+ .b8 105
252
+ .b8 116
253
+ .b8 111
254
+ .b8 110
255
+ .b8 95
256
+ .b8 95
257
+ .b8 48
258
+ .b8 100
259
+ .b8 49
260
+ .b8 100
261
+ .b8 101
262
+ .b8 0
263
+ .b32 0
264
+ $L__pubNames_end0:
265
+ }
266
+ .section .debug_pubtypes
267
+ {
268
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
269
+ $L__pubTypes_start0:
270
+ .b8 2
271
+ .b8 0
272
+ .b32 .debug_info
273
+ .b32 176
274
+ .b32 0
275
+ $L__pubTypes_end0:
276
+ }
277
+ .section .debug_loc { }
.triton/dump/884b5df35d2a25fd91308249e7657806/triton_.ttir ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module {
2
+ tt.func public @triton__0d1de(%arg0: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg1: i64 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
3
+ %c1024_i64 = arith.constant 1024 : i64
4
+ %cst = arith.constant dense<0.000000e+00> : tensor<1024xf32>
5
+ %0 = tt.get_program_id x : i32
6
+ %1 = arith.extsi %0 : i32 to i64
7
+ %2 = arith.muli %1, %c1024_i64 : i64
8
+ %3 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
9
+ %4 = arith.extsi %3 : tensor<1024xi32> to tensor<1024xi64>
10
+ %5 = tt.splat %2 : (i64) -> tensor<1024xi64>
11
+ %6 = arith.addi %5, %4 : tensor<1024xi64>
12
+ %7 = tt.splat %arg0 : (!tt.ptr<f32, 1>) -> tensor<1024x!tt.ptr<f32, 1>>
13
+ %8 = tt.addptr %7, %6 : tensor<1024x!tt.ptr<f32, 1>>, tensor<1024xi64>
14
+ tt.store %8, %cst {cache = 1 : i32, evict = 1 : i32} : tensor<1024xf32>
15
+ tt.return
16
+ }
17
+ }
.triton/dump/93ab21d512b10f4271e68c2f0ae3393c/triton_.cubin ADDED
Binary file (5.54 kB). View file
 
.triton/dump/9a2fb05196b13393bea452d08e9aaca8/triton_.cubin ADDED
Binary file (4.9 kB). View file
 
.triton/dump/9f68cc707cb8f8bff3232abf59cbd9ec/triton_.ptx ADDED
@@ -0,0 +1,886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Generated by LLVM NVPTX Back-End
3
+ //
4
+
5
+ .version 8.2
6
+ .target sm_89
7
+ .address_size 64
8
+
9
+ // .globl triton__0d1d2d3d4d5de6de
10
+ .extern .func __assertfail
11
+ (
12
+ .param .b64 __assertfail_param_0,
13
+ .param .b64 __assertfail_param_1,
14
+ .param .b32 __assertfail_param_2,
15
+ .param .b64 __assertfail_param_3,
16
+ .param .b64 __assertfail_param_4
17
+ )
18
+ ;
19
+ .global .align 1 .b8 assertFunc_1[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
20
+ .global .align 1 .b8 assertFile_1[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
21
+ .global .align 1 .b8 assertMessage_1[39] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 49, 51, 32, 60, 32, 53, 48, 50, 53, 55};
22
+ .global .align 1 .b8 assertFunc_0[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
23
+ .global .align 1 .b8 assertFile_0[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
24
+ .global .align 1 .b8 assertMessage_0[38] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 51, 32, 60, 32, 53, 48, 50, 53, 55};
25
+ .extern .shared .align 1 .b8 global_smem[];
26
+ .global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
27
+
28
+ .visible .entry triton__0d1d2d3d4d5de6de(
29
+ .param .u64 triton__0d1d2d3d4d5de6de_param_0,
30
+ .param .u64 triton__0d1d2d3d4d5de6de_param_1,
31
+ .param .u64 triton__0d1d2d3d4d5de6de_param_2,
32
+ .param .u64 triton__0d1d2d3d4d5de6de_param_3,
33
+ .param .u64 triton__0d1d2d3d4d5de6de_param_4,
34
+ .param .u32 triton__0d1d2d3d4d5de6de_param_5,
35
+ .param .u32 triton__0d1d2d3d4d5de6de_param_6
36
+ )
37
+ .maxntid 128, 1, 1
38
+ {
39
+ .reg .pred %p<42>;
40
+ .reg .b16 %rs<5>;
41
+ .reg .b32 %r<140>;
42
+ .reg .f32 %f<148>;
43
+ .reg .b64 %rd<67>;
44
+ .loc 1 18 0
45
+ $L__func_begin0:
46
+ .loc 1 18 0
47
+
48
+ ld.param.u64 %rd17, [triton__0d1d2d3d4d5de6de_param_4];
49
+ ld.param.u64 %rd16, [triton__0d1d2d3d4d5de6de_param_3];
50
+ ld.param.u64 %rd28, [triton__0d1d2d3d4d5de6de_param_0];
51
+ ld.param.u64 %rd29, [triton__0d1d2d3d4d5de6de_param_1];
52
+ $L__tmp0:
53
+ .loc 1 22 44
54
+ mov.u32 %r16, %tid.x;
55
+ and.b32 %r1, %r16, 31;
56
+ ld.param.u64 %rd30, [triton__0d1d2d3d4d5de6de_param_2];
57
+ bfe.u32 %r2, %r16, 5, 2;
58
+ bfe.u32 %r3, %r16, 1, 4;
59
+ shl.b32 %r17, %r2, 4;
60
+ or.b32 %r4, %r17, %r3;
61
+ and.b32 %r18, %r16, 63;
62
+ .loc 1 24 33
63
+ shl.b32 %r19, %r16, 2;
64
+ and.b32 %r5, %r19, 4;
65
+ and.b32 %r6, %r16, 7;
66
+ .loc 1 31 36
67
+ shl.b32 %r7, %r2, 2;
68
+ .loc 1 21 28
69
+ mov.u32 %r14, %ctaid.x;
70
+ .loc 1 21 33
71
+ shl.b32 %r20, %r14, 6;
72
+ .loc 1 22 23
73
+ or.b32 %r21, %r20, %r4;
74
+ or.b32 %r22, %r20, %r18;
75
+ .loc 1 26 30
76
+ mul.wide.s32 %rd31, %r21, 8;
77
+ add.s64 %rd19, %rd28, %rd31;
78
+ mul.wide.s32 %rd32, %r22, 8;
79
+ add.s64 %rd27, %rd28, %rd32;
80
+ mov.pred %p1, -1;
81
+ .loc 1 26 35
82
+ mov.u64 %rd18, 0x0;
83
+ @%p1 ld.global.L1::evict_last.b64 { %rd18 }, [ %rd19 + 0 ];
84
+ mov.u64 %rd20, 0x0;
85
+ @%p1 ld.global.L1::evict_last.b64 { %rd20 }, [ %rd19 + 0 ];
86
+ mov.u64 %rd22, 0x0;
87
+ @%p1 ld.global.L1::evict_last.b64 { %rd22 }, [ %rd19 + 0 ];
88
+ mov.u64 %rd24, 0x0;
89
+ @%p1 ld.global.L1::evict_last.b64 { %rd24 }, [ %rd19 + 0 ];
90
+ mov.u64 %rd26, 0x0;
91
+ @%p1 ld.global.L1::evict_last.b64 { %rd26 }, [ %rd27 + 0 ];
92
+ .loc 1 27 18
93
+ bfe.s32 %r23, %r14, 25, 1;
94
+ shr.u32 %r24, %r23, 23;
95
+ add.s32 %r25, %r21, %r24;
96
+ and.b32 %r26, %r25, 16776704;
97
+ sub.s32 %r27, %r21, %r26;
98
+ .loc 1 35 44
99
+ shl.b32 %r28, %r27, 8;
100
+ .loc 1 36 22
101
+ add.s64 %rd33, %rd26, 50257;
102
+ .loc 1 37 22
103
+ setp.lt.s64 %p6, %rd18, 0;
104
+ setp.lt.s64 %p7, %rd26, 0;
105
+ .loc 1 38 36
106
+ selp.b64 %rd1, %rd33, %rd26, %p7;
107
+ .loc 1 40 44
108
+ shl.b64 %rd34, %rd18, 8;
109
+ add.s64 %rd35, %rd34, 12865792;
110
+ selp.b64 %rd36, %rd35, %rd34, %p6;
111
+ .loc 1 31 36
112
+ and.b32 %r29, %r16, 1;
113
+ mul.wide.u32 %rd2, %r29, 16;
114
+ shl.b64 %rd37, %rd36, 2;
115
+ or.b64 %rd38, %rd2, %rd37;
116
+ add.s64 %rd66, %rd29, %rd38;
117
+ or.b32 %r30, %r28, %r5;
118
+ mul.wide.s32 %rd39, %r30, 4;
119
+ add.s64 %rd64, %rd30, %rd39;
120
+ mov.f32 %f132, 0f00000000;
121
+ mov.b32 %r138, -8;
122
+ mov.u64 %rd62, %rd64;
123
+ mov.u64 %rd63, %rd66;
124
+ mov.f32 %f133, %f132;
125
+ mov.f32 %f134, %f132;
126
+ mov.f32 %f135, %f132;
127
+ mov.f32 %f136, %f132;
128
+ mov.f32 %f137, %f132;
129
+ mov.f32 %f138, %f132;
130
+ mov.f32 %f139, %f132;
131
+ mov.f32 %f140, %f132;
132
+ mov.f32 %f141, %f132;
133
+ mov.f32 %f142, %f132;
134
+ mov.f32 %f143, %f132;
135
+ mov.f32 %f144, %f132;
136
+ mov.f32 %f145, %f132;
137
+ mov.f32 %f146, %f132;
138
+ mov.f32 %f147, %f132;
139
+ bra.uni $L__BB0_1;
140
+ $L__BB0_3:
141
+ .loc 1 0 0
142
+ mov.b32 %f17, %r31;
143
+ mov.b32 %f18, %r32;
144
+ mov.b32 %f19, %r33;
145
+ mov.b32 %f20, %r34;
146
+ .loc 1 40 52
147
+ mov.u32 %r40, 0x0;
148
+ mov.u32 %r41, 0x0;
149
+ mov.u32 %r42, 0x0;
150
+ mov.u32 %r43, 0x0;
151
+ @%p1 ld.global.L1::evict_last.v4.b32 { %r40, %r41, %r42, %r43 }, [ %rd63 + 0 ];
152
+ @!%p1 mov.u32 %r40, %r124;
153
+ @!%p1 mov.u32 %r41, %r124;
154
+ @!%p1 mov.u32 %r42, %r124;
155
+ @!%p1 mov.u32 %r43, %r124;
156
+ mov.b32 %f48, %r40;
157
+ mov.b32 %f49, %r41;
158
+ mov.b32 %f50, %r42;
159
+ mov.b32 %f51, %r43;
160
+ .loc 1 41 22
161
+ add.f32 %f52, %f17, %f48;
162
+ add.f32 %f53, %f18, %f49;
163
+ add.f32 %f54, %f19, %f50;
164
+ add.f32 %f55, %f20, %f51;
165
+ $L__tmp1:
166
+ .loc 2 96 20
167
+ sub.f32 %f56, %f52, %f144;
168
+ sub.f32 %f57, %f53, %f145;
169
+ sub.f32 %f58, %f54, %f146;
170
+ sub.f32 %f59, %f55, %f147;
171
+ .loc 2 97 26
172
+ add.f32 %f132, %f132, 0f3F800000;
173
+ add.f32 %f133, %f133, 0f3F800000;
174
+ add.f32 %f134, %f134, 0f3F800000;
175
+ add.f32 %f135, %f135, 0f3F800000;
176
+ add.f32 %f136, %f136, 0f3F800000;
177
+ add.f32 %f137, %f137, 0f3F800000;
178
+ add.f32 %f138, %f138, 0f3F800000;
179
+ add.f32 %f139, %f139, 0f3F800000;
180
+ .loc 2 98 30
181
+ mov.b32 %r49, %f56;
182
+ mov.b32 %r50, %f132;
183
+ div.full.f32 %r48, %r49, %r50;
184
+ mov.b32 %f60, %r48;
185
+ mov.b32 %r52, %f57;
186
+ mov.b32 %r53, %f133;
187
+ div.full.f32 %r51, %r52, %r53;
188
+ mov.b32 %f61, %r51;
189
+ mov.b32 %r55, %f58;
190
+ mov.b32 %r56, %f134;
191
+ div.full.f32 %r54, %r55, %r56;
192
+ mov.b32 %f62, %r54;
193
+ mov.b32 %r58, %f59;
194
+ mov.b32 %r59, %f135;
195
+ div.full.f32 %r57, %r58, %r59;
196
+ mov.b32 %f63, %r57;
197
+ .loc 2 98 22
198
+ add.f32 %f144, %f144, %f60;
199
+ add.f32 %f145, %f145, %f61;
200
+ add.f32 %f146, %f146, %f62;
201
+ add.f32 %f147, %f147, %f63;
202
+ .loc 2 101 30
203
+ sub.f32 %f64, %f52, %f144;
204
+ sub.f32 %f65, %f53, %f145;
205
+ sub.f32 %f66, %f54, %f146;
206
+ sub.f32 %f67, %f55, %f147;
207
+ $L__tmp2:
208
+ .loc 1 47 48
209
+ fma.rn.f32 %f140, %f56, %f64, %f140;
210
+ fma.rn.f32 %f141, %f57, %f65, %f141;
211
+ fma.rn.f32 %f142, %f58, %f66, %f142;
212
+ fma.rn.f32 %f143, %f59, %f67, %f143;
213
+ .loc 1 31 36
214
+ add.s32 %r138, %r138, 8;
215
+ add.s64 %rd63, %rd63, 32;
216
+ add.s64 %rd62, %rd62, 32;
217
+ setp.lt.u32 %p19, %r138, 248;
218
+ @%p19 bra $L__BB0_1;
219
+ bra.uni $L__BB0_4;
220
+ $L__BB0_1:
221
+ .loc 1 39 40
222
+ setp.lt.u64 %p13, %rd1, 50257;
223
+ mov.b32 %r124, 0;
224
+ .loc 1 35 50
225
+ mov.u32 %r31, 0x0;
226
+ mov.u32 %r32, 0x0;
227
+ mov.u32 %r33, 0x0;
228
+ mov.u32 %r34, 0x0;
229
+ @%p1 ld.global.L1::evict_last.v4.b32 { %r31, %r32, %r33, %r34 }, [ %rd62 + 0 ];
230
+ @!%p1 mov.u32 %r31, %r124;
231
+ @!%p1 mov.u32 %r32, %r124;
232
+ @!%p1 mov.u32 %r33, %r124;
233
+ @!%p1 mov.u32 %r34, %r124;
234
+ mov.b32 %r137, 883;
235
+ mov.u64 %rd61, 1;
236
+ .loc 1 39 55
237
+ @%p13 bra $L__BB0_3;
238
+ mov.u64 %rd41, assertMessage_0;
239
+ cvta.global.u64 %rd42, %rd41;
240
+ mov.u64 %rd43, assertFile_0;
241
+ cvta.global.u64 %rd44, %rd43;
242
+ mov.u64 %rd45, assertFunc_0;
243
+ cvta.global.u64 %rd46, %rd45;
244
+ { // callseq 2, 0
245
+ .reg .b32 temp_param_reg;
246
+ .param .b64 param0;
247
+ st.param.b64 [param0+0], %rd42;
248
+ .param .b64 param1;
249
+ st.param.b64 [param1+0], %rd44;
250
+ .param .b32 param2;
251
+ st.param.b32 [param2+0], %r137;
252
+ .param .b64 param3;
253
+ st.param.b64 [param3+0], %rd46;
254
+ .param .b64 param4;
255
+ st.param.b64 [param4+0], %rd61;
256
+ call.uni
257
+ __assertfail,
258
+ (
259
+ param0,
260
+ param1,
261
+ param2,
262
+ param3,
263
+ param4
264
+ );
265
+ } // callseq 2
266
+ bra.uni $L__BB0_3;
267
+ $L__BB0_4:
268
+ .loc 1 31 36
269
+ shr.u32 %r85, %r1, 3;
270
+ or.b32 %r86, %r7, %r85;
271
+ mad.lo.s32 %r87, %r86, 12, %r6;
272
+ shl.b32 %r88, %r87, 2;
273
+ mov.u32 %r89, global_smem;
274
+ add.s32 %r90, %r89, %r88;
275
+ st.shared.f32 [%r90], %f136;
276
+ st.shared.f32 [%r90+768], %f137;
277
+ st.shared.f32 [%r90+1536], %f138;
278
+ st.shared.f32 [%r90+2304], %f139;
279
+ bar.sync 0;
280
+ mad.lo.s32 %r91, %r4, 12, %r5;
281
+ shl.b32 %r92, %r91, 2;
282
+ add.s32 %r93, %r89, %r92;
283
+ ld.shared.v4.f32 {%f68, %f69, %f70, %f71}, [%r93];
284
+ $L__tmp3:
285
+ .loc 2 108 21
286
+ sub.f32 %f72, %f145, %f144;
287
+ .loc 2 109 28
288
+ add.f32 %f73, %f68, %f69;
289
+ .loc 2 110 39
290
+ setp.eq.f32 %p20, %f73, 0f00000000;
291
+ .loc 2 110 60
292
+ mov.b32 %r61, %f69;
293
+ mov.b32 %r62, %f73;
294
+ div.full.f32 %r60, %r61, %r62;
295
+ mov.b32 %f74, %r60;
296
+ .loc 2 110 49
297
+ selp.f32 %f75, 0f00000000, %f74, %p20;
298
+ .loc 2 112 17
299
+ fma.rn.f32 %f76, %f72, %f75, %f144;
300
+ .loc 2 113 15
301
+ add.f32 %f77, %f140, %f141;
302
+ .loc 2 113 30
303
+ mul.f32 %f78, %f72, %f72;
304
+ .loc 2 113 38
305
+ mul.f32 %f79, %f78, %f68;
306
+ .loc 2 113 22
307
+ fma.rn.f32 %f80, %f79, %f75, %f77;
308
+ .loc 2 108 21
309
+ sub.f32 %f81, %f146, %f76;
310
+ .loc 2 109 28
311
+ add.f32 %f82, %f70, %f73;
312
+ .loc 2 110 39
313
+ setp.eq.f32 %p21, %f82, 0f00000000;
314
+ .loc 2 110 60
315
+ mov.b32 %r65, %f82;
316
+ mov.b32 %r64, %f70;
317
+ div.full.f32 %r63, %r64, %r65;
318
+ mov.b32 %f83, %r63;
319
+ .loc 2 110 49
320
+ selp.f32 %f84, 0f00000000, %f83, %p21;
321
+ .loc 2 112 17
322
+ fma.rn.f32 %f85, %f84, %f81, %f76;
323
+ .loc 2 113 15
324
+ add.f32 %f86, %f142, %f80;
325
+ .loc 2 113 30
326
+ mul.f32 %f87, %f81, %f81;
327
+ .loc 2 113 38
328
+ mul.f32 %f88, %f73, %f87;
329
+ .loc 2 113 22
330
+ fma.rn.f32 %f89, %f84, %f88, %f86;
331
+ .loc 2 108 21
332
+ sub.f32 %f90, %f147, %f85;
333
+ .loc 2 109 28
334
+ add.f32 %f91, %f71, %f82;
335
+ .loc 2 110 39
336
+ setp.eq.f32 %p22, %f91, 0f00000000;
337
+ .loc 2 110 60
338
+ mov.b32 %r68, %f91;
339
+ mov.b32 %r67, %f71;
340
+ div.full.f32 %r66, %r67, %r68;
341
+ mov.b32 %f92, %r66;
342
+ .loc 2 110 49
343
+ selp.f32 %f93, 0f00000000, %f92, %p22;
344
+ .loc 2 112 17
345
+ fma.rn.f32 %f94, %f93, %f90, %f85;
346
+ .loc 2 113 15
347
+ add.f32 %f95, %f143, %f89;
348
+ .loc 2 113 30
349
+ mul.f32 %f96, %f90, %f90;
350
+ .loc 2 113 38
351
+ mul.f32 %f97, %f82, %f96;
352
+ .loc 2 113 22
353
+ fma.rn.f32 %f98, %f93, %f97, %f95;
354
+ $L__tmp4:
355
+ .loc 2 120 46
356
+ mov.b32 %r94, %f94;
357
+ shfl.sync.bfly.b32 %r95, %r94, 1, 31, -1;
358
+ mov.b32 %f99, %r95;
359
+ mov.b32 %r96, %f98;
360
+ shfl.sync.bfly.b32 %r97, %r96, 1, 31, -1;
361
+ mov.b32 %f100, %r97;
362
+ shfl.sync.bfly.b32 %r70, %r68, 1, 31, -1;
363
+ mov.b32 %f101, %r70;
364
+ $L__tmp5:
365
+ .loc 2 108 21
366
+ sub.f32 %f102, %f99, %f94;
367
+ .loc 2 109 28
368
+ add.f32 %f103, %f91, %f101;
369
+ .loc 2 110 39
370
+ setp.eq.f32 %p23, %f103, 0f00000000;
371
+ .loc 2 110 60
372
+ mov.b32 %r71, %f103;
373
+ div.full.f32 %r69, %r70, %r71;
374
+ mov.b32 %f104, %r69;
375
+ .loc 2 110 49
376
+ selp.f32 %f105, 0f00000000, %f104, %p23;
377
+ .loc 2 112 17
378
+ fma.rn.f32 %f37, %f105, %f102, %f94;
379
+ .loc 2 113 15
380
+ add.f32 %f106, %f98, %f100;
381
+ .loc 2 113 30
382
+ mul.f32 %f107, %f102, %f102;
383
+ .loc 2 113 38
384
+ mul.f32 %f108, %f91, %f107;
385
+ .loc 2 113 22
386
+ fma.rn.f32 %f109, %f105, %f108, %f106;
387
+ $L__tmp6:
388
+ .loc 1 69 23
389
+ mov.b32 %r73, %f109;
390
+ mov.b32 %r74, 1132462080;
391
+ div.full.f32 %r72, %r73, %r74;
392
+ mov.b32 %f110, %r72;
393
+ .loc 1 71 24
394
+ add.f32 %f38, %f110, 0f3727C5AC;
395
+ .loc 1 55 36
396
+ shl.b32 %r98, %r14, 14;
397
+ shl.b32 %r99, %r2, 12;
398
+ or.b32 %r100, %r98, %r99;
399
+ shl.b32 %r101, %r3, 8;
400
+ or.b32 %r102, %r100, %r101;
401
+ or.b32 %r11, %r102, %r5;
402
+ add.s64 %rd65, %rd16, %rd2;
403
+ mov.b32 %r139, -8;
404
+ rsqrt.approx.ftz.f32 %f123, %f38;
405
+ bra.uni $L__BB0_5;
406
+ $L__BB0_7:
407
+ .loc 1 65 54
408
+ mov.u32 %r120, 0x0;
409
+ mov.u32 %r121, 0x0;
410
+ mov.u32 %r122, 0x0;
411
+ mov.u32 %r123, 0x0;
412
+ @%p1 ld.global.L1::evict_first.v4.b32 { %r120, %r121, %r122, %r123 }, [ %rd66 + 0 ];
413
+ @!%p1 mov.u32 %r120, %r124;
414
+ @!%p1 mov.u32 %r121, %r124;
415
+ @!%p1 mov.u32 %r122, %r124;
416
+ @!%p1 mov.u32 %r123, %r124;
417
+ mov.b32 %f111, %r120;
418
+ mov.b32 %f112, %r121;
419
+ mov.b32 %f113, %r122;
420
+ mov.b32 %f114, %r123;
421
+ .loc 1 66 24
422
+ add.f32 %f115, %f39, %f111;
423
+ add.f32 %f116, %f40, %f112;
424
+ add.f32 %f117, %f41, %f113;
425
+ add.f32 %f118, %f42, %f114;
426
+ .loc 1 67 24
427
+ sub.f32 %f119, %f115, %f37;
428
+ sub.f32 %f120, %f116, %f37;
429
+ sub.f32 %f121, %f117, %f37;
430
+ sub.f32 %f122, %f118, %f37;
431
+ .loc 1 73 24
432
+ mul.f32 %f124, %f119, %f123;
433
+ mul.f32 %f125, %f120, %f123;
434
+ mul.f32 %f126, %f121, %f123;
435
+ mul.f32 %f127, %f122, %f123;
436
+ .loc 1 74 24
437
+ mul.f32 %f128, %f124, %f43;
438
+ mul.f32 %f129, %f125, %f44;
439
+ mul.f32 %f130, %f126, %f45;
440
+ mul.f32 %f131, %f127, %f46;
441
+ .loc 1 55 36
442
+ add.s32 %r139, %r139, 8;
443
+ .loc 1 76 29
444
+ add.s32 %r134, %r139, %r11;
445
+ mul.wide.s32 %rd60, %r134, 2;
446
+ add.s64 %rd59, %rd17, %rd60;
447
+ .loc 1 76 52
448
+ mov.b32 %r128, %f128;
449
+ cvt.rn.bf16.f32 %rs1, %r128;
450
+ mov.b32 %r129, %f129;
451
+ cvt.rn.bf16.f32 %rs2, %r129;
452
+ mov.b32 %r130, %f130;
453
+ cvt.rn.bf16.f32 %rs3, %r130;
454
+ mov.b32 %r131, %f131;
455
+ cvt.rn.bf16.f32 %rs4, %r131;
456
+ mov.b32 %r135, {%rs1, %rs2};
457
+ mov.b32 %r136, {%rs3, %rs4};
458
+ @%p1 st.global.v2.b32 [ %rd59 + 0 ], { %r135, %r136 };
459
+ .loc 1 55 36
460
+ add.s64 %rd66, %rd66, 32;
461
+ add.s64 %rd65, %rd65, 32;
462
+ add.s64 %rd64, %rd64, 32;
463
+ setp.lt.u32 %p41, %r139, 248;
464
+ @%p41 bra $L__BB0_5;
465
+ bra.uni $L__BB0_8;
466
+ $L__BB0_5:
467
+ .loc 1 59 51
468
+ mov.u32 %r103, 0x0;
469
+ mov.u32 %r104, 0x0;
470
+ mov.u32 %r105, 0x0;
471
+ mov.u32 %r106, 0x0;
472
+ @%p1 ld.global.L1::evict_last.v4.b32 { %r103, %r104, %r105, %r106 }, [ %rd64 + 0 ];
473
+ @!%p1 mov.u32 %r103, %r124;
474
+ @!%p1 mov.u32 %r104, %r124;
475
+ @!%p1 mov.u32 %r105, %r124;
476
+ @!%p1 mov.u32 %r106, %r124;
477
+ mov.b32 %f39, %r103;
478
+ mov.b32 %f40, %r104;
479
+ mov.b32 %f41, %r105;
480
+ mov.b32 %f42, %r106;
481
+ .loc 1 60 40
482
+ mov.u32 %r111, 0x0;
483
+ mov.u32 %r112, 0x0;
484
+ mov.u32 %r113, 0x0;
485
+ mov.u32 %r114, 0x0;
486
+ @%p1 ld.global.L1::evict_last.v4.b32 { %r111, %r112, %r113, %r114 }, [ %rd65 + 0 ];
487
+ @!%p1 mov.u32 %r111, %r124;
488
+ @!%p1 mov.u32 %r112, %r124;
489
+ @!%p1 mov.u32 %r113, %r124;
490
+ @!%p1 mov.u32 %r114, %r124;
491
+ mov.b32 %f43, %r111;
492
+ mov.b32 %f44, %r112;
493
+ mov.b32 %f45, %r113;
494
+ mov.b32 %f46, %r114;
495
+ .loc 1 64 57
496
+ @%p13 bra $L__BB0_7;
497
+ mov.u64 %rd51, assertMessage_1;
498
+ cvta.global.u64 %rd52, %rd51;
499
+ mov.u64 %rd53, assertFile_1;
500
+ cvta.global.u64 %rd54, %rd53;
501
+ mov.u64 %rd55, assertFunc_1;
502
+ cvta.global.u64 %rd56, %rd55;
503
+ { // callseq 3, 0
504
+ .reg .b32 temp_param_reg;
505
+ .param .b64 param0;
506
+ st.param.b64 [param0+0], %rd52;
507
+ .param .b64 param1;
508
+ st.param.b64 [param1+0], %rd54;
509
+ .param .b32 param2;
510
+ st.param.b32 [param2+0], %r137;
511
+ .param .b64 param3;
512
+ st.param.b64 [param3+0], %rd56;
513
+ .param .b64 param4;
514
+ st.param.b64 [param4+0], %rd61;
515
+ call.uni
516
+ __assertfail,
517
+ (
518
+ param0,
519
+ param1,
520
+ param2,
521
+ param3,
522
+ param4
523
+ );
524
+ } // callseq 3
525
+ bra.uni $L__BB0_7;
526
+ $L__BB0_8:
527
+ .loc 1 55 4
528
+ ret;
529
+ $L__tmp7:
530
+ $L__func_end0:
531
+
532
+ }
533
+ // .globl __nv_rsqrtf
534
+ .visible .func (.param .b32 func_retval0) __nv_rsqrtf(
535
+ .param .b32 __nv_rsqrtf_param_0
536
+ )
537
+ {
538
+ .reg .f32 %f<3>;
539
+ $L__func_begin1:
540
+
541
+ ld.param.f32 %f1, [__nv_rsqrtf_param_0];
542
+ rsqrt.approx.ftz.f32 %f2, %f1;
543
+ st.param.f32 [func_retval0+0], %f2;
544
+ ret;
545
+ $L__func_end1:
546
+
547
+ }
548
+ .file 1 "/tmp/torchinductor_root/lh/clhe4a3stvufxafmq3kk5hodazz2efctffte646znjdnv3lqi5oa.py"
549
+ .file 2 "/usr/local/lib/python3.10/dist-packages/torch/_inductor/triton_helpers.py"
550
+ .section .debug_abbrev
551
+ {
552
+ .b8 1
553
+ .b8 17
554
+ .b8 1
555
+ .b8 37
556
+ .b8 8
557
+ .b8 19
558
+ .b8 5
559
+ .b8 3
560
+ .b8 8
561
+ .b8 16
562
+ .b8 6
563
+ .b8 27
564
+ .b8 8
565
+ .b8 180
566
+ .b8 66
567
+ .b8 12
568
+ .b8 17
569
+ .b8 1
570
+ .b8 18
571
+ .b8 1
572
+ .b8 0
573
+ .b8 0
574
+ .b8 2
575
+ .b8 46
576
+ .b8 0
577
+ .b8 135
578
+ .b8 64
579
+ .b8 8
580
+ .b8 3
581
+ .b8 8
582
+ .b8 58
583
+ .b8 11
584
+ .b8 59
585
+ .b8 11
586
+ .b8 63
587
+ .b8 12
588
+ .b8 32
589
+ .b8 11
590
+ .b8 0
591
+ .b8 0
592
+ .b8 3
593
+ .b8 46
594
+ .b8 1
595
+ .b8 17
596
+ .b8 1
597
+ .b8 18
598
+ .b8 1
599
+ .b8 64
600
+ .b8 10
601
+ .b8 49
602
+ .b8 19
603
+ .b8 0
604
+ .b8 0
605
+ .b8 4
606
+ .b8 29
607
+ .b8 0
608
+ .b8 49
609
+ .b8 19
610
+ .b8 17
611
+ .b8 1
612
+ .b8 18
613
+ .b8 1
614
+ .b8 88
615
+ .b8 11
616
+ .b8 89
617
+ .b8 11
618
+ .b8 87
619
+ .b8 11
620
+ .b8 0
621
+ .b8 0
622
+ .b8 5
623
+ .b8 29
624
+ .b8 1
625
+ .b8 49
626
+ .b8 19
627
+ .b8 17
628
+ .b8 1
629
+ .b8 18
630
+ .b8 1
631
+ .b8 88
632
+ .b8 11
633
+ .b8 89
634
+ .b8 11
635
+ .b8 87
636
+ .b8 11
637
+ .b8 0
638
+ .b8 0
639
+ .b8 0
640
+ }
641
+ .section .debug_info
642
+ {
643
+ .b32 298
644
+ .b8 2
645
+ .b8 0
646
+ .b32 .debug_abbrev
647
+ .b8 8
648
+ .b8 1
649
+ .b8 116
650
+ .b8 114
651
+ .b8 105
652
+ .b8 116
653
+ .b8 111
654
+ .b8 110
655
+ .b8 0
656
+ .b8 2
657
+ .b8 0
658
+ .b8 99
659
+ .b8 108
660
+ .b8 104
661
+ .b8 101
662
+ .b8 52
663
+ .b8 97
664
+ .b8 51
665
+ .b8 115
666
+ .b8 116
667
+ .b8 118
668
+ .b8 117
669
+ .b8 102
670
+ .b8 120
671
+ .b8 97
672
+ .b8 102
673
+ .b8 109
674
+ .b8 113
675
+ .b8 51
676
+ .b8 107
677
+ .b8 107
678
+ .b8 53
679
+ .b8 104
680
+ .b8 111
681
+ .b8 100
682
+ .b8 97
683
+ .b8 122
684
+ .b8 122
685
+ .b8 50
686
+ .b8 101
687
+ .b8 102
688
+ .b8 99
689
+ .b8 116
690
+ .b8 102
691
+ .b8 102
692
+ .b8 116
693
+ .b8 101
694
+ .b8 54
695
+ .b8 52
696
+ .b8 54
697
+ .b8 122
698
+ .b8 110
699
+ .b8 106
700
+ .b8 100
701
+ .b8 110
702
+ .b8 118
703
+ .b8 51
704
+ .b8 108
705
+ .b8 113
706
+ .b8 105
707
+ .b8 53
708
+ .b8 111
709
+ .b8 97
710
+ .b8 46
711
+ .b8 112
712
+ .b8 121
713
+ .b8 0
714
+ .b32 .debug_line
715
+ .b8 47
716
+ .b8 116
717
+ .b8 109
718
+ .b8 112
719
+ .b8 47
720
+ .b8 116
721
+ .b8 111
722
+ .b8 114
723
+ .b8 99
724
+ .b8 104
725
+ .b8 105
726
+ .b8 110
727
+ .b8 100
728
+ .b8 117
729
+ .b8 99
730
+ .b8 116
731
+ .b8 111
732
+ .b8 114
733
+ .b8 95
734
+ .b8 114
735
+ .b8 111
736
+ .b8 111
737
+ .b8 116
738
+ .b8 47
739
+ .b8 108
740
+ .b8 104
741
+ .b8 0
742
+ .b8 1
743
+ .b64 $L__func_begin0
744
+ .b64 $L__func_end0
745
+ .b8 2
746
+ .b8 116
747
+ .b8 114
748
+ .b8 105
749
+ .b8 116
750
+ .b8 111
751
+ .b8 110
752
+ .b8 95
753
+ .b8 95
754
+ .b8 48
755
+ .b8 100
756
+ .b8 49
757
+ .b8 100
758
+ .b8 50
759
+ .b8 100
760
+ .b8 51
761
+ .b8 100
762
+ .b8 52
763
+ .b8 100
764
+ .b8 53
765
+ .b8 100
766
+ .b8 101
767
+ .b8 54
768
+ .b8 100
769
+ .b8 101
770
+ .b8 0
771
+ .b8 116
772
+ .b8 114
773
+ .b8 105
774
+ .b8 116
775
+ .b8 111
776
+ .b8 110
777
+ .b8 95
778
+ .b8 95
779
+ .b8 48
780
+ .b8 100
781
+ .b8 49
782
+ .b8 100
783
+ .b8 50
784
+ .b8 100
785
+ .b8 51
786
+ .b8 100
787
+ .b8 52
788
+ .b8 100
789
+ .b8 53
790
+ .b8 100
791
+ .b8 101
792
+ .b8 54
793
+ .b8 100
794
+ .b8 101
795
+ .b8 0
796
+ .b8 1
797
+ .b8 18
798
+ .b8 1
799
+ .b8 1
800
+ .b8 3
801
+ .b64 $L__func_begin0
802
+ .b64 $L__func_end0
803
+ .b8 1
804
+ .b8 156
805
+ .b32 125
806
+ .b8 4
807
+ .b32 125
808
+ .b64 $L__tmp1
809
+ .b64 $L__tmp2
810
+ .b8 2
811
+ .b8 44
812
+ .b8 38
813
+ .b8 5
814
+ .b32 125
815
+ .b64 $L__tmp3
816
+ .b64 $L__tmp6
817
+ .b8 2
818
+ .b8 50
819
+ .b8 41
820
+ .b8 4
821
+ .b32 125
822
+ .b64 $L__tmp3
823
+ .b64 $L__tmp6
824
+ .b8 2
825
+ .b8 120
826
+ .b8 46
827
+ .b8 0
828
+ .b8 4
829
+ .b32 125
830
+ .b64 $L__tmp4
831
+ .b64 $L__tmp5
832
+ .b8 2
833
+ .b8 50
834
+ .b8 41
835
+ .b8 0
836
+ .b8 0
837
+ }
838
+ .section .debug_pubnames
839
+ {
840
+ .b32 $L__pubNames_end0-$L__pubNames_start0
841
+ $L__pubNames_start0:
842
+ .b8 2
843
+ .b8 0
844
+ .b32 .debug_info
845
+ .b32 302
846
+ .b32 125
847
+ .b8 116
848
+ .b8 114
849
+ .b8 105
850
+ .b8 116
851
+ .b8 111
852
+ .b8 110
853
+ .b8 95
854
+ .b8 95
855
+ .b8 48
856
+ .b8 100
857
+ .b8 49
858
+ .b8 100
859
+ .b8 50
860
+ .b8 100
861
+ .b8 51
862
+ .b8 100
863
+ .b8 52
864
+ .b8 100
865
+ .b8 53
866
+ .b8 100
867
+ .b8 101
868
+ .b8 54
869
+ .b8 100
870
+ .b8 101
871
+ .b8 0
872
+ .b32 0
873
+ $L__pubNames_end0:
874
+ }
875
+ .section .debug_pubtypes
876
+ {
877
+ .b32 $L__pubTypes_end0-$L__pubTypes_start0
878
+ $L__pubTypes_start0:
879
+ .b8 2
880
+ .b8 0
881
+ .b32 .debug_info
882
+ .b32 302
883
+ .b32 0
884
+ $L__pubTypes_end0:
885
+ }
886
+ .section .debug_loc { }