Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .launchpadlib/api.launchpad.net/cache/api.launchpad.net,devel,~deadsnakes,+archive,ubuntu,ppa,ws.op=getSigningKeyData-application,json,c76e9ed0b661c7fa5da42e8fb2da319e +22 -0
- .local/share/jupyter/nbextensions/snippets_menu/snippets_submenus_python/sympy.js +750 -0
- .local/share/jupyter/nbextensions/toc2/toc2.js +826 -0
- .local/share/jupyter/nbextensions/toc2/toc2.yaml +104 -0
- .local/share/jupyter/nbextensions/toggle_all_line_numbers/main.js +82 -0
- .local/share/jupyter/nbextensions/toggle_all_line_numbers/main.yaml +16 -0
- .local/share/jupyter/nbextensions/toggle_all_line_numbers/readme.md +5 -0
- .local/share/jupyter/nbextensions/tree-filter/demo.gif +0 -0
- .local/share/jupyter/nbextensions/varInspector/__pycache__/var_list.cpython-310.pyc +0 -0
- .local/share/jupyter/nbextensions/varInspector/demo.gif +0 -0
- .local/share/jupyter/nbextensions/varInspector/jquery.tablesorter.min.js +2 -0
- .local/share/jupyter/nbextensions/varInspector/main.css +119 -0
- .local/share/jupyter/nbextensions/varInspector/main.js +462 -0
- .local/share/jupyter/nbextensions/varInspector/varInspector.yaml +45 -0
- .local/share/jupyter/nbextensions/varInspector/var_list.r +17 -0
- .local/share/jupyter/nbextensions/zenmode/README.md +4 -0
- .local/share/jupyter/nbextensions/zenmode/images/back3.jpg +0 -0
- .local/share/jupyter/nbextensions/zenmode/main.css +34 -0
- .local/share/jupyter/nbextensions/zenmode/main.js +196 -0
- .local/share/jupyter/nbextensions/zenmode/zenmode.yaml +28 -0
- .triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.cubin +0 -0
- .triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.ttir +113 -0
- .triton/dump/174400122b6dbc99e086544aa1856b9f/triton_.cubin +0 -0
- .triton/dump/199215289adb100508718a5a762ba4d7/triton_.cubin +0 -0
- .triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.cubin +0 -0
- .triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.ptx +312 -0
- .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.cubin +0 -0
- .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.llir +332 -0
- .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ptx +446 -0
- .triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ttgir +26 -0
- .triton/dump/305a9479aab997a3a16bfe46bb303a50/triton_.cubin +0 -0
- .triton/dump/345a87a492fd703c73ab83265a21fcb6/triton_.cubin +0 -0
- .triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ptx +809 -0
- .triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ttgir +152 -0
- .triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.cubin +0 -0
- .triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.ptx +295 -0
- .triton/dump/4c6ad48573c74d55ed79384f6b432d50/triton_.ttir +18 -0
- .triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttgir +28 -0
- .triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttir +27 -0
- .triton/dump/51e329eae41e4ee17aa201fff8371d94/triton_.llir +0 -0
- .triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.llir +600 -0
- .triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.ttir +153 -0
- .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.cubin +0 -0
- .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.llir +745 -0
- .triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.ttir +101 -0
- .triton/dump/7dc5bb3e5c2bb99527fff34c6fba7810/triton_.ptx +277 -0
- .triton/dump/884b5df35d2a25fd91308249e7657806/triton_.ttir +17 -0
- .triton/dump/93ab21d512b10f4271e68c2f0ae3393c/triton_.cubin +0 -0
- .triton/dump/9a2fb05196b13393bea452d08e9aaca8/triton_.cubin +0 -0
- .triton/dump/9f68cc707cb8f8bff3232abf59cbd9ec/triton_.ptx +886 -0
.launchpadlib/api.launchpad.net/cache/api.launchpad.net,devel,~deadsnakes,+archive,ubuntu,ppa,ws.op=getSigningKeyData-application,json,c76e9ed0b661c7fa5da42e8fb2da319e
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
status: 200
|
2 |
+
date: Mon, 05 Feb 2024 23:25:35 GMT
|
3 |
+
server: gunicorn
|
4 |
+
x-powered-by: Zope (www.zope.org), Python (www.python.org)
|
5 |
+
content-security-policy: frame-ancestors 'self';
|
6 |
+
content-type: application/json
|
7 |
+
strict-transport-security: max-age=15552000
|
8 |
+
vary: Accept,Accept-Encoding
|
9 |
+
x-content-type-options: nosniff
|
10 |
+
x-frame-options: SAMEORIGIN
|
11 |
+
x-launchpad-revision: 9643586c585856148a18782148972ae9c1179d06
|
12 |
+
x-lazr-notifications: []
|
13 |
+
x-xss-protection: 1; mode=block
|
14 |
+
x-vcs-revision: 9643586c585856148a18782148972ae9c1179d06
|
15 |
+
x-request-id: 452e0c68-aa99-4bb4-abc3-237c7bb39fae
|
16 |
+
content-length: 1641
|
17 |
+
-content-encoding: gzip
|
18 |
+
content-location: https://api.launchpad.net/devel/~deadsnakes/+archive/ubuntu/ppa?ws.op=getSigningKeyData
|
19 |
+
-varied-accept: application/json
|
20 |
+
-varied-accept-encoding: gzip, deflate
|
21 |
+
|
22 |
+
"-----BEGIN PGP PUBLIC KEY BLOCK-----\n\nmQINBFl8fYEBEADQmGZ6pDrwY9iH9DVlwNwTOvOZ7q7lHXPl/TLfMs1tckMc/D9a\nhsdBN9VWtMmo+RySvhkIe8X15r65TFs2HE8ft6j2e/4K472pObM1hB+ajiU/wYX2\nSyq7DBlNm6YMP5/SyQzRxqis4Ja1uUjW4Q5/Csdf5In8uMzXj5D1P7qOiP2aNa0E\nr3w6PXWRTuTihWZOsHv8npyVYDBRR6gEZbd3r86snI/7o8Bfmad3KjbxL7aOdNMw\nAqQFaNKl7Y+UJpv1CNFIf+twcOoC0se1SrsVJlAH9HNHM7XGQsPUwpNvQlcmvr+t\n1vVS2m72lk3gyShDuJpi1TifGw+DoTqu54U0k+0sZm4pnQVeiizNkefU2UqOoGlt\n4oiG9nIhSX04xRlGes3Ya0OjNI5b1xbcYoR+r0c3odI+UCw3VSZtKDX/xlH1o/82\nb8ouXeE7LA1i4DvGNj4VSvoxv4ggIznxMf+PkWXWKwRGsbAAXF52rr4FUaeaKoIU\nDkJqHXAxrB3PQslZ+ZgBEukkQZF76NkqRqP1E7FXzZZMo2eEL7vtnhSzUlanOf42\nECBoWHVoZQaRFMNbGpqlg9aWedHGyetMStS3nH1sqanr+i4I8VR/UH+ilarPTW3T\nE0apWlsH8+N3IKbRx2wgrRZNoQEuyVtvyewDFYShJB3Zxt7VCy67vKAl1QARAQAB\ntBxMYXVuY2hwYWQgUFBBIGZvciBkZWFkc25ha2VziQI4BBMBAgAiBQJZfH2BAhsD\nBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRC6aTI2anVXdvwhD/4oI3yckeKn\n9aJNNTJsyw4ydMkIAOdG+jbZsYv/rN73UVQF1RA8HC71SDmbd0Nu80koBOX+USuL\nvvhoMIsARlD5dLx5f/zaQcYWJm/BtsMF/eZ4s1xsenwW6PpXd8FpaTn1qtg/8+O9\n99R4uSetAhhyf1vSRb/8U0sgSQd38mpZZFq352UuVisXnmCThj621loQubYJ3lwU\nLSLs8wmgo4XIYH7UgdavV9dfplPh0M19RHQL3wTyQP2KRNRq1rG7/n1XzUwDyqY6\neMVhdVhvnxAGztvdFCySVzBRr/rCw6quhcYQwBqdqaXhz63np+4mlUNfd8Eu+Vas\nb/tbteF/pDu0yeFMpK4X09Cwn2kYYCpq4XujijW+iRWb4MO3G8LLi8oBAHP/k0CM\n/QvSRbbG8JDQkQDH37Efm8iE/EttJTixjKAIfyugmvEHfcrnxaMoBioa6h6McQrM\nvI8bJirxorJzOVF4kY7xXvMYwjzaDC8G0fTA8SzQRaShksR3USXZjz8vS6tZ+YNa\nmRHPoZ3Ua0bz4t2aCcu/fknVGsXcNBazNIK9WF2665Ut/b7lDbojXsUZ3PpuqOoe\nGQL9LRj7nmCI6ugoKkNp8ZXcGJ8BGw37Wep2ztyzDohXp6f/4mGgy2KYV9R4S8D5\nyBDUU6BS7Su5nhQMStfdfr4FffLmnvFC9w==\n=7hFk\n-----END PGP PUBLIC KEY BLOCK-----\n"
|
.local/share/jupyter/nbextensions/snippets_menu/snippets_submenus_python/sympy.js
ADDED
@@ -0,0 +1,750 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
define([
|
2 |
+
"require",
|
3 |
+
"./sympy_functions",
|
4 |
+
"./sympy_assumptions",
|
5 |
+
], function (requirejs, sympy_functions, sympy_assumptions) {
|
6 |
+
return {
|
7 |
+
'name' : 'SymPy',
|
8 |
+
'sub-menu' : [
|
9 |
+
{
|
10 |
+
'name' : 'Setup',
|
11 |
+
'snippet' : [
|
12 |
+
'from __future__ import print_function, division',
|
13 |
+
'from sympy import *',
|
14 |
+
'a, s, t, u, v, w, x, y, z = symbols("a, s, t, u, v, w, x, y, z")',
|
15 |
+
'k, m, n = symbols("k, m, n", integer=True)',
|
16 |
+
'f, g, h = symbols("f, g, h", cls=Function)',
|
17 |
+
'init_printing()',
|
18 |
+
],
|
19 |
+
},
|
20 |
+
{
|
21 |
+
'name' : 'Documentation',
|
22 |
+
'external-link' : 'http://docs.sympy.org/latest/index.html',
|
23 |
+
},
|
24 |
+
'---',
|
25 |
+
{
|
26 |
+
'name' : 'Constants',
|
27 |
+
'sub-menu' : [
|
28 |
+
{
|
29 |
+
'name' : '1',
|
30 |
+
'snippet' : ['S(1)',], //'S.One',],
|
31 |
+
},
|
32 |
+
// {
|
33 |
+
// 'name' : '0',
|
34 |
+
// 'snippet' : ['S.Zero',],
|
35 |
+
// },
|
36 |
+
// {
|
37 |
+
// 'name' : '-1',
|
38 |
+
// 'snippet' : ['S.NegativeOne',],
|
39 |
+
// },
|
40 |
+
{
|
41 |
+
'name' : '1/2',
|
42 |
+
'snippet' : ['S(1)/2',], //'S.Half',],
|
43 |
+
},
|
44 |
+
{
|
45 |
+
'name' : 'Rational numbers',
|
46 |
+
'snippet' : ['Rational(3, 7)',],
|
47 |
+
},
|
48 |
+
'---',
|
49 |
+
{
|
50 |
+
'name' : 'Base of natural logarithm, \\(e\\)',
|
51 |
+
'snippet' : ['E',],
|
52 |
+
},
|
53 |
+
{
|
54 |
+
'name' : 'Unit imaginary number, \\(i\\)',
|
55 |
+
'snippet' : ['I',],
|
56 |
+
},
|
57 |
+
{
|
58 |
+
'name' : 'Geometric constant, \\(\\pi\\)',
|
59 |
+
'snippet' : ['pi',],
|
60 |
+
},
|
61 |
+
{
|
62 |
+
'name' : 'Golden ratio, \\(\\phi\\)',
|
63 |
+
'snippet' : ['GoldenRatio',],
|
64 |
+
},
|
65 |
+
{
|
66 |
+
'name' : 'Euler-Mascheroni constant, \\(\\gamma\\)',
|
67 |
+
'snippet' : ['EulerGamma',],
|
68 |
+
},
|
69 |
+
{
|
70 |
+
'name' : 'Catalan\'s constant, \\(K\\)',
|
71 |
+
'snippet' : ['Catalan',],
|
72 |
+
},
|
73 |
+
'---',
|
74 |
+
{
|
75 |
+
'name' : 'Infinity, \\(\\infty\\)',
|
76 |
+
'snippet' : ['oo',], // 'S.Infinity'
|
77 |
+
},
|
78 |
+
// {
|
79 |
+
// 'name' : 'Negative infinity, \\(-\\infty\\)',
|
80 |
+
// 'snippet' : ['S.NegativeInfinity',],
|
81 |
+
// },
|
82 |
+
{
|
83 |
+
'name' : 'Complex infinity, \\(\\tilde{\\infty}\\)',
|
84 |
+
'snippet' : ['zoo'], //'S.ComplexInfinity',],
|
85 |
+
},
|
86 |
+
{
|
87 |
+
'name' : 'NaN',
|
88 |
+
'snippet' : ['nan',], // 'S.NaN'
|
89 |
+
},
|
90 |
+
],
|
91 |
+
},
|
92 |
+
sympy_functions,
|
93 |
+
{
|
94 |
+
'name' : 'Calculus',
|
95 |
+
'sub-menu' : [
|
96 |
+
{
|
97 |
+
'name' : 'Differentiate once',
|
98 |
+
'snippet' : [
|
99 |
+
'expr = exp(x**2)',
|
100 |
+
'deriv = diff(expr, x)',
|
101 |
+
],
|
102 |
+
},
|
103 |
+
{
|
104 |
+
'name' : 'Differentiate multiple times',
|
105 |
+
'snippet' : [
|
106 |
+
'expr = x**4',
|
107 |
+
'deriv = diff(expr, x, 3)',
|
108 |
+
],
|
109 |
+
},
|
110 |
+
{
|
111 |
+
'name' : 'Mixed partial derivatives',
|
112 |
+
'snippet' : [
|
113 |
+
'expr = exp(x*y*z)',
|
114 |
+
'deriv = diff(expr, x, y, 2, z, 4)',
|
115 |
+
],
|
116 |
+
},
|
117 |
+
{
|
118 |
+
'name' : 'Finite differences',
|
119 |
+
'snippet' : [
|
120 |
+
'dx0, dx1 = symbols("dx0, dx1")',
|
121 |
+
'formula = as_finite_diff(f(x).diff(x), [x-dx0, x, x+dx1])',
|
122 |
+
],
|
123 |
+
},
|
124 |
+
'---',
|
125 |
+
{
|
126 |
+
'name' : 'Indefinite integral',
|
127 |
+
'snippet' : [
|
128 |
+
'integral = integrate(cos(x), x)',
|
129 |
+
],
|
130 |
+
},
|
131 |
+
{
|
132 |
+
'name' : 'Definite integral',
|
133 |
+
'snippet' : [
|
134 |
+
'integral = integrate(exp(-x), (x, 0, oo))',
|
135 |
+
],
|
136 |
+
},
|
137 |
+
{
|
138 |
+
'name' : 'Double integral',
|
139 |
+
'snippet' : [
|
140 |
+
'integral = integrate(exp(-x**2-y**2), (x, -oo, oo), (y, -oo, oo))',
|
141 |
+
],
|
142 |
+
},
|
143 |
+
'---',
|
144 |
+
{
|
145 |
+
'name' : 'Limits',
|
146 |
+
'snippet' : [
|
147 |
+
'lim = limit(sin(x)/x, x, 0, "+")',
|
148 |
+
],
|
149 |
+
},
|
150 |
+
{
|
151 |
+
'name' : 'Series expansion',
|
152 |
+
'snippet' : [
|
153 |
+
'expr = exp(sin(x))',
|
154 |
+
'ser = series(expr, x, 0, 6)',
|
155 |
+
],
|
156 |
+
},
|
157 |
+
{
|
158 |
+
'name' : 'Series expansion, removing order term',
|
159 |
+
'snippet' : [
|
160 |
+
'expr = exp(sin(x))',
|
161 |
+
'ser = series(expr, x, 0, 6).removeO()',
|
162 |
+
],
|
163 |
+
},
|
164 |
+
{
|
165 |
+
'name' : 'Summations',
|
166 |
+
'snippet' : [
|
167 |
+
'ell_min,ell,ell_max = symbols("ell_min,ell,ell_max", integer=True)',
|
168 |
+
'summ = summation((2*ell + 1), (ell, ell_min, ell_max))',
|
169 |
+
],
|
170 |
+
},
|
171 |
+
],
|
172 |
+
},
|
173 |
+
|
174 |
+
{
|
175 |
+
'name' : 'Solvers',
|
176 |
+
'sub-menu' : [
|
177 |
+
{
|
178 |
+
'name' : 'Solve for one variable',
|
179 |
+
'snippet' : [
|
180 |
+
'expr = x**4 - 4*x**3 + 2*x**2 - x',
|
181 |
+
'eqn = Eq(expr, 0)',
|
182 |
+
'soln = solve(eqn, x)',
|
183 |
+
],
|
184 |
+
},
|
185 |
+
{
|
186 |
+
'name' : 'Solve for two variables',
|
187 |
+
'snippet' : [
|
188 |
+
'eqns = Eq(x + y, 4), Eq(x*y, 3)',
|
189 |
+
'soln = solve(eqns, [x,y])',
|
190 |
+
],
|
191 |
+
},
|
192 |
+
{
|
193 |
+
'name' : 'Solve differential equation',
|
194 |
+
'snippet' : [
|
195 |
+
'expr = f(x).diff(x, x) + 9*f(x)',
|
196 |
+
"eqn = Eq(expr, 1) # f''(x) + 9f(x) = 1",
|
197 |
+
'soln = dsolve(eqn, f(x))',
|
198 |
+
],
|
199 |
+
},
|
200 |
+
],
|
201 |
+
},
|
202 |
+
{
|
203 |
+
'name' : 'Manipulating expressions',
|
204 |
+
'sub-menu' : [
|
205 |
+
{
|
206 |
+
'name' : 'Simplify',
|
207 |
+
'snippet' : [
|
208 |
+
'expr = (x**3 + x**2 - x - 1)/(x**2 + 2*x + 1)',
|
209 |
+
'expr = simplify(expr)',
|
210 |
+
],
|
211 |
+
},
|
212 |
+
{
|
213 |
+
'name' : 'Refine, using assumptions',
|
214 |
+
// 'snippet' : [
|
215 |
+
// 'expr = exp(pi*I*2*x)',
|
216 |
+
// 'assumption = Q.integer(x) & Q.integer(y)',
|
217 |
+
// 'expr = refine(expr, assumption)',
|
218 |
+
// ],
|
219 |
+
'sub-menu' : [
|
220 |
+
{
|
221 |
+
'name' : 'Refine',
|
222 |
+
'snippet' : [
|
223 |
+
'expr = exp(pi*I*2*(x+y))',
|
224 |
+
'assumption = Q.integer(x) & Q.integer(y)',
|
225 |
+
'expr = refine(expr, assumption)',
|
226 |
+
],
|
227 |
+
},
|
228 |
+
{
|
229 |
+
'name' : 'Refine in context manager',
|
230 |
+
'snippet' : [
|
231 |
+
'expr = exp(pi*I*2*(x+y))',
|
232 |
+
'with assuming(Q.integer(x) & Q.integer(y)):',
|
233 |
+
' expr = refine(expr)',
|
234 |
+
],
|
235 |
+
},
|
236 |
+
sympy_assumptions,
|
237 |
+
],
|
238 |
+
},
|
239 |
+
{
|
240 |
+
'name' : 'Expansion',
|
241 |
+
'sub-menu' : [
|
242 |
+
{
|
243 |
+
'name' : 'Expand basic expressions',
|
244 |
+
'snippet' : [
|
245 |
+
'expr = (x + 2)*(x - 3)',
|
246 |
+
'expr = expand(expr)',
|
247 |
+
],
|
248 |
+
},
|
249 |
+
'---',
|
250 |
+
{
|
251 |
+
'name' : 'Expand, including complex parts',
|
252 |
+
'snippet' : [
|
253 |
+
'expr = cos(x)',
|
254 |
+
'expr = expand(expr, complex=True)',
|
255 |
+
],
|
256 |
+
},
|
257 |
+
{
|
258 |
+
'name' : 'Expand, including functions',
|
259 |
+
'snippet' : [
|
260 |
+
'expr = gamma(x+3)',
|
261 |
+
'expr = expand(expr, func=True)',
|
262 |
+
],
|
263 |
+
},
|
264 |
+
{
|
265 |
+
'name' : 'Expand, including trig',
|
266 |
+
'snippet' : [
|
267 |
+
'expr = sin(x+y)*(x+y)',
|
268 |
+
'expr = expand(expr, trig=True)',
|
269 |
+
],
|
270 |
+
},
|
271 |
+
'---',
|
272 |
+
{
|
273 |
+
'name' : 'Expand only real and imaginary parts',
|
274 |
+
'snippet' : [
|
275 |
+
'expand_complex(x)',
|
276 |
+
],
|
277 |
+
},
|
278 |
+
{
|
279 |
+
'name' : 'Expand only functions',
|
280 |
+
'snippet' : [
|
281 |
+
'expr = gamma(x + 2)',
|
282 |
+
'expr = expand_func(expr)',
|
283 |
+
],
|
284 |
+
},
|
285 |
+
{
|
286 |
+
'name' : 'Expand only hypergeometric functions',
|
287 |
+
'snippet' : [
|
288 |
+
'expr = hyper([1,1], [1,], z) + gamma(z)',
|
289 |
+
'expr = hyperexpand(expr)',
|
290 |
+
],
|
291 |
+
},
|
292 |
+
{
|
293 |
+
'name' : 'Expand only logarithms',
|
294 |
+
'snippet' : [
|
295 |
+
'a, b = symbols("a, b", positive=True)',
|
296 |
+
'expr = log(a**2*b)',
|
297 |
+
'expr = expand_log(expr)',
|
298 |
+
],
|
299 |
+
},
|
300 |
+
{
|
301 |
+
'name' : 'Expand only multiplication over addition',
|
302 |
+
'snippet' : [
|
303 |
+
'expr = y*(x + z)',
|
304 |
+
'expr = expand_mul(expr)',
|
305 |
+
],
|
306 |
+
},
|
307 |
+
{
|
308 |
+
'name' : 'Expand only multinomials',
|
309 |
+
'snippet' : [
|
310 |
+
'expr = (x + y + z)**3',
|
311 |
+
'expr = expand_multinomial(expr)',
|
312 |
+
],
|
313 |
+
},
|
314 |
+
{
|
315 |
+
'name' : 'Expand only powers of multiplied bases',
|
316 |
+
'snippet' : [
|
317 |
+
'a, b = symbols("a, b", positive=True)',
|
318 |
+
'expr = (a*b)**z',
|
319 |
+
'expr = expand_power_base(expr)',
|
320 |
+
],
|
321 |
+
},
|
322 |
+
{
|
323 |
+
'name' : 'Expand only addition in exponents',
|
324 |
+
'snippet' : [
|
325 |
+
'expr = x**(y + 2)',
|
326 |
+
'expr = expand_power_exp(expr)',
|
327 |
+
],
|
328 |
+
},
|
329 |
+
{
|
330 |
+
'name' : 'Expand only trig',
|
331 |
+
'snippet' : [
|
332 |
+
'expr = sin(x+y)*(x+y)',
|
333 |
+
'expr = expand_trig(expr)',
|
334 |
+
],
|
335 |
+
},
|
336 |
+
],
|
337 |
+
},
|
338 |
+
{
|
339 |
+
'name' : 'Collect terms',
|
340 |
+
'sub-menu' : [
|
341 |
+
{
|
342 |
+
'name' : 'Collect as coefficients of one factor',
|
343 |
+
'snippet' : [
|
344 |
+
'expr = y*x**2 + z*x**2 + t*x - 2*x + 3',
|
345 |
+
'expr = collect(expr, x)',
|
346 |
+
],
|
347 |
+
},
|
348 |
+
{
|
349 |
+
'name' : 'Collect as coefficients of multiple factors',
|
350 |
+
'snippet' : [
|
351 |
+
'expr = x**2 + y*x**2 + x*y + y + z*y',
|
352 |
+
'expr = collect(expr, [x, y])',
|
353 |
+
],
|
354 |
+
},
|
355 |
+
{
|
356 |
+
'name' : 'Collect with respect to wild card',
|
357 |
+
'snippet' : [
|
358 |
+
'w = Wild("w")',
|
359 |
+
'expr = z*x**y - t*z**y',
|
360 |
+
'expr = collect(expr, w**y)',
|
361 |
+
],
|
362 |
+
},
|
363 |
+
{
|
364 |
+
'name' : 'Collect and apply function to each coefficient',
|
365 |
+
'snippet' : [
|
366 |
+
'expr = expand((x + y + 1)**3)',
|
367 |
+
'expr = collect(expr, x, factor)',
|
368 |
+
],
|
369 |
+
},
|
370 |
+
{
|
371 |
+
'name' : 'Recursively collect',
|
372 |
+
'snippet' : [
|
373 |
+
'expr = (x**2*y + x*y + x + y)/(x*y + z*y)',
|
374 |
+
'expr = rcollect(expr, y)',
|
375 |
+
],
|
376 |
+
},
|
377 |
+
{
|
378 |
+
'name' : 'Collect constants',
|
379 |
+
'snippet' : [
|
380 |
+
'expr = sqrt(3)*x + sqrt(7)*x + sqrt(3) + sqrt(7)',
|
381 |
+
'expr = collect_const(expr)',
|
382 |
+
],
|
383 |
+
},
|
384 |
+
],
|
385 |
+
},
|
386 |
+
{
|
387 |
+
'name' : 'Substitutions and replacements',
|
388 |
+
'sub-menu' : [
|
389 |
+
{
|
390 |
+
'name' : 'Substitute one subexpression for another',
|
391 |
+
'snippet' : [
|
392 |
+
'expr = 1 + x*y',
|
393 |
+
'expr = expr.subs(x, pi)',
|
394 |
+
],
|
395 |
+
},
|
396 |
+
{
|
397 |
+
'name' : 'Substitute multiple subexpressions successively',
|
398 |
+
'snippet' : [
|
399 |
+
'expr = (x+y)/y',
|
400 |
+
'substitutions = [(x+y, y), (y, x+y)]',
|
401 |
+
'expr = expr.subs(substitutions)',
|
402 |
+
],
|
403 |
+
},
|
404 |
+
{
|
405 |
+
'name' : 'Substitute multiple subexpressions simultaneously',
|
406 |
+
'snippet' : [
|
407 |
+
'expr = (x+y)/y',
|
408 |
+
'substitutions = [(x+y, y), (y, x+y)]',
|
409 |
+
'expr = expr.subs(substitutions, simultaneous=True)',
|
410 |
+
],
|
411 |
+
},
|
412 |
+
'---',
|
413 |
+
{
|
414 |
+
'name' : 'Replace one object with another',
|
415 |
+
'snippet' : [
|
416 |
+
'expr = 1 + x*y',
|
417 |
+
'expr = expr.replace(x, pi)',
|
418 |
+
],
|
419 |
+
},
|
420 |
+
{
|
421 |
+
'name' : 'Replace one object with some function of its arguments',
|
422 |
+
'snippet' : [
|
423 |
+
'expr = log(sin(x)) + tan(sin(x**2))',
|
424 |
+
'expr = expr.replace(sin, lambda arg: sin(2*arg))',
|
425 |
+
],
|
426 |
+
},
|
427 |
+
{
|
428 |
+
'name' : 'Replace a pattern with an object',
|
429 |
+
'snippet' : [
|
430 |
+
'# Note: `exclude=` specifies that the Wild cannot match any item in the list',
|
431 |
+
'a, b = symbols("a, b", cls=Wild, exclude=[x,y])',
|
432 |
+
'expr = 2*x + y + z',
|
433 |
+
'wild = a*x + b',
|
434 |
+
'replacement = b - a',
|
435 |
+
'# Note: `exact=True` demands that all Wilds have nonzero matches',
|
436 |
+
'expr = expr.replace(wild, replacement, exact=True)',
|
437 |
+
],
|
438 |
+
},
|
439 |
+
{
|
440 |
+
'name' : 'Replace a pattern with some function of that object',
|
441 |
+
'snippet' : [
|
442 |
+
'a = symbols("a", cls=Wild, exclude=[])',
|
443 |
+
'expr = log(sin(x)) + tan(sin(x**2))',
|
444 |
+
'expr.replace(sin(a), lambda a: sin(2*a))',
|
445 |
+
],
|
446 |
+
},
|
447 |
+
{
|
448 |
+
'name' : 'Replace anything with some function of that thing',
|
449 |
+
'snippet' : [
|
450 |
+
'g = 2*sin(x**3)',
|
451 |
+
'g.replace(lambda expr: expr.is_Function, lambda expr: expr**2)',
|
452 |
+
],
|
453 |
+
},
|
454 |
+
'---',
|
455 |
+
{
|
456 |
+
'name' : 'Replace exact subexpressions',
|
457 |
+
'snippet' : [
|
458 |
+
'expr = x**2 + x**4',
|
459 |
+
'replacements = {x**2: y}',
|
460 |
+
'expr = expr.xreplace(replacements)',
|
461 |
+
],
|
462 |
+
},
|
463 |
+
// {
|
464 |
+
// 'name' : 'rewrite',
|
465 |
+
// 'snippet' : [
|
466 |
+
// 'expr = tan(x)',
|
467 |
+
// 'expr = expr.rewrite(sin)',
|
468 |
+
// ],
|
469 |
+
// },
|
470 |
+
],
|
471 |
+
},
|
472 |
+
{
|
473 |
+
'name' : 'Evaluation',
|
474 |
+
'sub-menu' : [
|
475 |
+
{
|
476 |
+
'name' : 'Evaluate numerically to arbitrary precision',
|
477 |
+
'snippet' : [
|
478 |
+
'expr = x * sqrt(8)',
|
479 |
+
'precision = 50',
|
480 |
+
'val = N(expr, precision, subs={x:2.4})',
|
481 |
+
],
|
482 |
+
},
|
483 |
+
{
|
484 |
+
'name' : 'Evaluate numerically to python float',
|
485 |
+
'snippet' : [
|
486 |
+
'expr = x * sqrt(8)',
|
487 |
+
'val = float(expr.subs([(x, 2.4)]))',
|
488 |
+
],
|
489 |
+
},
|
490 |
+
{
|
491 |
+
'name' : 'Create numpy function for efficient evaluation',
|
492 |
+
'snippet' : [
|
493 |
+
'import numpy',
|
494 |
+
'a = numpy.arange(10)',
|
495 |
+
'expr = sin(x)',
|
496 |
+
'f = lambdify(x, expr, "numpy")',
|
497 |
+
'vals = f(a)',
|
498 |
+
],
|
499 |
+
},
|
500 |
+
],
|
501 |
+
},
|
502 |
+
'---',
|
503 |
+
{
|
504 |
+
'name' : 'Polynomials',
|
505 |
+
'sub-menu' : [
|
506 |
+
{
|
507 |
+
'name' : 'Factor polynomial over rationals',
|
508 |
+
'snippet' : [
|
509 |
+
'expr = x**3 - x**2 + x - 1',
|
510 |
+
'expr = factor(expr)',
|
511 |
+
],
|
512 |
+
},
|
513 |
+
{
|
514 |
+
'name' : 'Collect common powers of a term',
|
515 |
+
'snippet' : [
|
516 |
+
'expr = x*y + x - 3 + 2*x**2 - z*x**2 + x**3',
|
517 |
+
'expr = collect(expr, x)',
|
518 |
+
],
|
519 |
+
},
|
520 |
+
{
|
521 |
+
'name' : 'Extract coefficient of a term',
|
522 |
+
'snippet' : [
|
523 |
+
'expr = 3+2*x+4*x**2',
|
524 |
+
'expr = expr.coeff(x**2)',
|
525 |
+
],
|
526 |
+
},
|
527 |
+
],
|
528 |
+
},
|
529 |
+
{
|
530 |
+
'name' : 'Rational functions',
|
531 |
+
'sub-menu' : [
|
532 |
+
{
|
533 |
+
'name' : 'Cancel',
|
534 |
+
'snippet' : [
|
535 |
+
'expr = (x**2 + 2*x + 1)/(x**2 + x)',
|
536 |
+
'expr = cancel(expr)',
|
537 |
+
],
|
538 |
+
},
|
539 |
+
{
|
540 |
+
'name' : 'Decompose into partial fractions',
|
541 |
+
'snippet' : [
|
542 |
+
'expr = (4*x**3 + 21*x**2 + 10*x + 12)/(x**4 + 5*x**3 + 5*x**2 + 4*x)',
|
543 |
+
'expr = apart(expr)',
|
544 |
+
],
|
545 |
+
},
|
546 |
+
{
|
547 |
+
'name' : 'Join over common denominator',
|
548 |
+
'snippet' : [
|
549 |
+
'expr = 1/x + 1/y',
|
550 |
+
'expr = ratsimp(expr)',
|
551 |
+
],
|
552 |
+
},
|
553 |
+
{
|
554 |
+
'name' : 'Remove square roots from denominator',
|
555 |
+
'snippet' : [
|
556 |
+
'expr = 1/(1+I)',
|
557 |
+
'expr = radsimp(expr)',
|
558 |
+
],
|
559 |
+
},
|
560 |
+
],
|
561 |
+
},
|
562 |
+
{
|
563 |
+
'name' : 'Powers',
|
564 |
+
'sub-menu' : [
|
565 |
+
{
|
566 |
+
'name' : 'Important caveats',
|
567 |
+
'external-link' : 'http://docs.sympy.org/dev/tutorial/simplification.html#powers'
|
568 |
+
},
|
569 |
+
'---',
|
570 |
+
// {
|
571 |
+
// 'name' : 'Setup for these snippets',
|
572 |
+
// 'snippet' : [
|
573 |
+
// 'x, y = symbols("x, y", positive=True)',
|
574 |
+
// 'a, b = symbols("a, b", real=True)',
|
575 |
+
// 'z, t, c = symbols("z, t, c")',
|
576 |
+
// ],
|
577 |
+
// },
|
578 |
+
{
|
579 |
+
'name' : 'Simplify powers for general arguments',
|
580 |
+
'snippet' : [
|
581 |
+
'powsimp(x**y * x**z)',
|
582 |
+
],
|
583 |
+
},
|
584 |
+
{
|
585 |
+
'name' : 'Simplify powers, forcing assumptions',
|
586 |
+
'snippet' : [
|
587 |
+
'powsimp(x**y * x**z, force=True)',
|
588 |
+
],
|
589 |
+
},
|
590 |
+
{
|
591 |
+
'name' : 'Expand powers by exponent for general arguments',
|
592 |
+
'snippet' : [
|
593 |
+
'expand_power_exp(x**(y + z))',
|
594 |
+
],
|
595 |
+
},
|
596 |
+
{
|
597 |
+
'name' : 'Expand powers of multiplied bases, forcing assumptions',
|
598 |
+
'snippet' : [
|
599 |
+
'expand_power_base((x*y)**z, force=True)',
|
600 |
+
],
|
601 |
+
},
|
602 |
+
{
|
603 |
+
'name' : 'Collect exponents on powers for general arguments',
|
604 |
+
'snippet' : [
|
605 |
+
'powdenest((x**y)**z)',
|
606 |
+
],
|
607 |
+
},
|
608 |
+
{
|
609 |
+
'name' : 'Collect exponents on powers, forcing assumptions',
|
610 |
+
'snippet' : [
|
611 |
+
'powdenest((x**y)**z, force=True)',
|
612 |
+
],
|
613 |
+
},
|
614 |
+
{
|
615 |
+
'name' : 'Collect exponents on powers, forcing assumptions and polar simplifications',
|
616 |
+
'snippet' : [
|
617 |
+
'powdenest((z**a)**b, force=True, polar=True)',
|
618 |
+
],
|
619 |
+
},
|
620 |
+
{
|
621 |
+
'name' : 'Denest square-roots',
|
622 |
+
'snippet' : [
|
623 |
+
'sqrtdenest(sqrt(5 + 2*sqrt(6)))',
|
624 |
+
],
|
625 |
+
},
|
626 |
+
],
|
627 |
+
},
|
628 |
+
{
|
629 |
+
'name' : 'Exponentials and Logarithms',
|
630 |
+
'sub-menu' : [
|
631 |
+
{
|
632 |
+
'name' : 'Important caveats',
|
633 |
+
'external-link' : 'http://docs.sympy.org/dev/tutorial/simplification.html#exponentials-and-logarithms'
|
634 |
+
},
|
635 |
+
'---',
|
636 |
+
// {
|
637 |
+
// 'name' : 'Setup for these snippets',
|
638 |
+
// 'snippet' : [
|
639 |
+
// 'x, y = symbols("x, y", positive=True)',
|
640 |
+
// 'n = symbols("n", real=True)',
|
641 |
+
// ],
|
642 |
+
// },
|
643 |
+
{
|
644 |
+
'name' : 'Combine exponentials',
|
645 |
+
'snippet' : [
|
646 |
+
'powsimp(exp(y) * exp(z))',
|
647 |
+
],
|
648 |
+
},
|
649 |
+
{
|
650 |
+
'name' : 'Expand logarithms for general arguments',
|
651 |
+
'snippet' : [
|
652 |
+
'expand_log(log(x*y))',
|
653 |
+
],
|
654 |
+
},
|
655 |
+
{
|
656 |
+
'name' : 'Expand logarithms, forcing assumptions',
|
657 |
+
'snippet' : [
|
658 |
+
'expand_log(log(z**2), force=True)',
|
659 |
+
],
|
660 |
+
},
|
661 |
+
{
|
662 |
+
'name' : 'Combine logarithms for general arguments',
|
663 |
+
'snippet' : [
|
664 |
+
'logcombine(log(x) + z*log(y))',
|
665 |
+
],
|
666 |
+
},
|
667 |
+
{
|
668 |
+
'name' : 'Combine logarithms, forcing assumptions',
|
669 |
+
'snippet' : [
|
670 |
+
'logcombine(log(x) + z*log(y))',
|
671 |
+
],
|
672 |
+
},
|
673 |
+
{
|
674 |
+
'name' : 'Simplification, possibly to trig functions',
|
675 |
+
'snippet' : [
|
676 |
+
'exptrigsimp(exp(z) + exp(-z))',
|
677 |
+
],
|
678 |
+
},
|
679 |
+
],
|
680 |
+
},
|
681 |
+
{
|
682 |
+
'name' : 'Trigonometric functions',
|
683 |
+
'sub-menu' : [
|
684 |
+
{
|
685 |
+
'name' : 'Expansion',
|
686 |
+
'snippet' : [
|
687 |
+
'expr = sin(x + y)',
|
688 |
+
'expr = expand(expr, trig=True)',
|
689 |
+
],
|
690 |
+
},
|
691 |
+
{
|
692 |
+
'name' : 'Simplification',
|
693 |
+
'snippet' : [
|
694 |
+
'expr = sin(x)**4 - 2*cos(x)**2*sin(x)**2 + cos(x)**4',
|
695 |
+
'expr = trigsimp(expr)',
|
696 |
+
],
|
697 |
+
},
|
698 |
+
{
|
699 |
+
'name' : 'Simplification, possibly to exponentials',
|
700 |
+
'snippet' : [
|
701 |
+
'expr = cosh(z) - sinh(z)',
|
702 |
+
'expr = exptrigsimp(expr)',
|
703 |
+
],
|
704 |
+
},
|
705 |
+
],
|
706 |
+
},
|
707 |
+
{
|
708 |
+
'name' : 'Miscellaneous',
|
709 |
+
'sub-menu' : [
|
710 |
+
{
|
711 |
+
'name' : 'Simplify factorials',
|
712 |
+
'snippet' : [
|
713 |
+
'expr = factorial(n)/factorial(n - 3)',
|
714 |
+
'expr = combsimp(expr)',
|
715 |
+
],
|
716 |
+
},
|
717 |
+
{
|
718 |
+
'name' : 'Simplify binomials',
|
719 |
+
'snippet' : [
|
720 |
+
'expr = binomial(n+1, k+1)/binomial(n, k)',
|
721 |
+
'expr = combsimp(expr)',
|
722 |
+
],
|
723 |
+
},
|
724 |
+
{
|
725 |
+
'name' : 'Simplify numerical expressions to exact values',
|
726 |
+
'snippet' : [
|
727 |
+
'nsimplify(4.0/(1+sqrt(5.0)), constants=[GoldenRatio,])',
|
728 |
+
],
|
729 |
+
},
|
730 |
+
{
|
731 |
+
'name' : 'Expand gamma functions',
|
732 |
+
'snippet' : [
|
733 |
+
'expr = gamma(z+3)',
|
734 |
+
'expr = expand_func(expr)',
|
735 |
+
],
|
736 |
+
},
|
737 |
+
{
|
738 |
+
'name' : 'Simplify Bessel functions',
|
739 |
+
'snippet' : [
|
740 |
+
'expr = besselj(x, z*polar_lift(-1))',
|
741 |
+
'expr = besselsimp(expr)',
|
742 |
+
],
|
743 |
+
},
|
744 |
+
],
|
745 |
+
},
|
746 |
+
],
|
747 |
+
},
|
748 |
+
],
|
749 |
+
};
|
750 |
+
});
|
.local/share/jupyter/nbextensions/toc2/toc2.js
ADDED
@@ -0,0 +1,826 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
(requirejs.specified('base/js/namespace') ? define : function(deps, callback) {
|
2 |
+
"use strict";
|
3 |
+
// if here, the Jupyter namespace hasn't been specified to be loaded.
|
4 |
+
// This means that we're probably embedded in a page, so we need to make
|
5 |
+
// our definition with a specific module name
|
6 |
+
return define('nbextensions/toc2/toc2', deps, callback);
|
7 |
+
})(['jquery', 'require'], function($, requirejs) {
|
8 |
+
"use strict";
|
9 |
+
|
10 |
+
var IPython;
|
11 |
+
var events;
|
12 |
+
var liveNotebook = false;
|
13 |
+
var all_headers = $("#notebook").find(":header");
|
14 |
+
|
15 |
+
// default values for system-wide configurable parameters
|
16 |
+
var default_cfg = {
|
17 |
+
colors: {
|
18 |
+
hover_highlight: '#DAA520',
|
19 |
+
selected_highlight: '#FFD700',
|
20 |
+
running_highlight: '#FF0000',
|
21 |
+
wrapper_background: '#FFFFFF',
|
22 |
+
sidebar_border: '#EEEEEE',
|
23 |
+
navigate_text: '#333333',
|
24 |
+
navigate_num: '#000000',
|
25 |
+
on_scroll: '#2447f0',
|
26 |
+
},
|
27 |
+
collapse_to_match_collapsible_headings: false,
|
28 |
+
markTocItemOnScroll: true,
|
29 |
+
moveMenuLeft: true,
|
30 |
+
navigate_menu: true,
|
31 |
+
threshold: 4,
|
32 |
+
widenNotebook: false,
|
33 |
+
};
|
34 |
+
// default values for per-notebook configurable parameters
|
35 |
+
var metadata_settings = {
|
36 |
+
nav_menu: {},
|
37 |
+
number_sections: true,
|
38 |
+
sideBar: true,
|
39 |
+
skip_h1_title: false,
|
40 |
+
base_numbering: 1,
|
41 |
+
title_cell: 'Table of Contents',
|
42 |
+
title_sidebar: 'Contents',
|
43 |
+
toc_cell: false,
|
44 |
+
toc_position: {},
|
45 |
+
toc_section_display: true,
|
46 |
+
toc_window_display: false,
|
47 |
+
};
|
48 |
+
$.extend(true, default_cfg, metadata_settings);
|
49 |
+
|
50 |
+
/**
|
51 |
+
* Read our config from server config & notebook metadata
|
52 |
+
* This function should only be called when both:
|
53 |
+
* 1. the notebook (and its metadata) has fully loaded
|
54 |
+
* AND
|
55 |
+
* 2. Jupyter.notebook.config.loaded has resolved
|
56 |
+
*/
|
57 |
+
var read_config = function () {
|
58 |
+
var cfg = default_cfg;
|
59 |
+
|
60 |
+
if (!liveNotebook) {
|
61 |
+
return cfg;
|
62 |
+
}
|
63 |
+
|
64 |
+
// config may be specified at system level or at document level.
|
65 |
+
// first, update defaults with config loaded from server
|
66 |
+
$.extend(true, cfg, IPython.notebook.config.data.toc2);
|
67 |
+
// ensure notebook metadata has toc object, cache old values
|
68 |
+
var md = IPython.notebook.metadata.toc || {};
|
69 |
+
// reset notebook metadata to remove old values
|
70 |
+
IPython.notebook.metadata.toc = {};
|
71 |
+
// then update cfg with any found in current notebook metadata
|
72 |
+
// and save in nb metadata (then can be modified per document)
|
73 |
+
Object.keys(metadata_settings).forEach(function (key) {
|
74 |
+
cfg[key] = IPython.notebook.metadata.toc[key] = (md.hasOwnProperty(key) ? md : cfg)[key];
|
75 |
+
});
|
76 |
+
return cfg;
|
77 |
+
};
|
78 |
+
|
79 |
+
// globally-used status variables:
|
80 |
+
var rendering_toc_cell = false;
|
81 |
+
// toc_position default also serves as the defaults for a non-live notebook
|
82 |
+
var toc_position = {height: 'calc(100% - 180px)', width: '20%', left: '10px', top: '150px'};
|
83 |
+
|
84 |
+
try {
|
85 |
+
// this will work in a live notebook because nbextensions & custom.js
|
86 |
+
// are loaded by/after notebook.js, which requires base/js/namespace
|
87 |
+
IPython = requirejs('base/js/namespace');
|
88 |
+
events = requirejs('base/js/events');
|
89 |
+
liveNotebook = true;
|
90 |
+
} catch (err) {
|
91 |
+
// We *are* theoretically in a non-live notebook
|
92 |
+
console.log('[toc2] working in non-live notebook'); //, err);
|
93 |
+
// in non-live notebook, there's no event structure, so we make our own
|
94 |
+
if (window.events === undefined) {
|
95 |
+
var Events = function() {};
|
96 |
+
window.events = $([new Events()]);
|
97 |
+
}
|
98 |
+
events = window.events;
|
99 |
+
}
|
100 |
+
var Jupyter = IPython;
|
101 |
+
|
102 |
+
var setMd = function(key, value) {
|
103 |
+
if (liveNotebook) {
|
104 |
+
var md = IPython.notebook.metadata.toc;
|
105 |
+
if (md === undefined) {
|
106 |
+
md = IPython.notebook.metadata.toc = {};
|
107 |
+
}
|
108 |
+
var old_val = md[key];
|
109 |
+
md[key] = value;
|
110 |
+
if (typeof _ !== undefined ? !_.isEqual(value, old_val) : old_val != value) {
|
111 |
+
IPython.notebook.set_dirty();
|
112 |
+
}
|
113 |
+
}
|
114 |
+
return value;
|
115 |
+
};
|
116 |
+
|
117 |
+
function incr_lbl(ary, h_idx) { //increment heading label w/ h_idx (zero based)
|
118 |
+
ary[h_idx]++;
|
119 |
+
for (var j = h_idx + 1; j < ary.length; j++) {
|
120 |
+
ary[j] = 0;
|
121 |
+
}
|
122 |
+
return ary.slice(0, h_idx + 1);
|
123 |
+
}
|
124 |
+
|
125 |
+
function removeMathJaxPreview(elt) {
|
126 |
+
elt.children('.anchor-link, .toc-mod-link').remove();
|
127 |
+
elt.find("script[type='math/tex']").each(
|
128 |
+
function(i, e) {
|
129 |
+
$(e).replaceWith('$' + $(e).text() + '$')
|
130 |
+
})
|
131 |
+
elt.find("span.MathJax_Preview").remove()
|
132 |
+
elt.find("span.MathJax").remove()
|
133 |
+
return elt
|
134 |
+
}
|
135 |
+
|
136 |
+
var callback_toc_link_click = function(evt) {
|
137 |
+
// workaround for https://github.com/jupyter/notebook/issues/699
|
138 |
+
setTimeout(function() {
|
139 |
+
$.ajax()
|
140 |
+
}, 100);
|
141 |
+
evt.preventDefault();
|
142 |
+
// Each time a link is clicked in the toc, save the current position and target in the history
|
143 |
+
var currentSection = $('#toc .highlight_on_scroll a').data('tocModifiedId')
|
144 |
+
if (window.history.state != null){
|
145 |
+
if (window.history.state.back != currentSection) {
|
146 |
+
window.history.pushState({'back':currentSection},"",'')
|
147 |
+
}
|
148 |
+
}
|
149 |
+
var trg_id = $(evt.currentTarget).attr('data-toc-modified-id');
|
150 |
+
window.history.pushState({'back':trg_id},"",'');
|
151 |
+
window.history.lastjump = trg_id;
|
152 |
+
|
153 |
+
// use native scrollIntoView method with semi-unique id
|
154 |
+
// ! browser native click does't follow links on all browsers
|
155 |
+
document.getElementById(trg_id).scrollIntoView(true)
|
156 |
+
if (liveNotebook) {
|
157 |
+
// use native document method as jquery won't cope with characters
|
158 |
+
// like . in an id
|
159 |
+
var cell = $(document.getElementById(trg_id)).closest('.cell').data('cell');
|
160 |
+
Jupyter.notebook.select(Jupyter.notebook.find_cell_index(cell));
|
161 |
+
highlight_toc_item("toc_link_click", {
|
162 |
+
cell: cell
|
163 |
+
});
|
164 |
+
}
|
165 |
+
};
|
166 |
+
|
167 |
+
//
|
168 |
+
window.addEventListener('popstate',
|
169 |
+
function(e) {
|
170 |
+
if (e.state != null && e.state.back != null) {
|
171 |
+
var back_id = e.state.back;
|
172 |
+
document.getElementById(back_id).scrollIntoView(true)
|
173 |
+
if (liveNotebook) {
|
174 |
+
var cell = $(document.getElementById(back_id)).closest('.cell').data('cell');
|
175 |
+
Jupyter.notebook.select(Jupyter.notebook.find_cell_index(cell));
|
176 |
+
highlight_toc_item("toc_link_click", {
|
177 |
+
cell: cell
|
178 |
+
});
|
179 |
+
}
|
180 |
+
}
|
181 |
+
});
|
182 |
+
|
183 |
+
var make_link = function(h, toc_mod_id) {
|
184 |
+
var a = $('<a>')
|
185 |
+
.attr({
|
186 |
+
'href': h.find('.anchor-link').attr('href'),
|
187 |
+
'data-toc-modified-id': toc_mod_id,
|
188 |
+
});
|
189 |
+
// get the text *excluding* the link text, whatever it may be
|
190 |
+
var hclone = h.clone();
|
191 |
+
hclone = removeMathJaxPreview(hclone);
|
192 |
+
a.html(hclone.html());
|
193 |
+
a.on('click', callback_toc_link_click);
|
194 |
+
return a;
|
195 |
+
};
|
196 |
+
|
197 |
+
function highlight_toc_item(evt, data) {
|
198 |
+
var c = $(data.cell.element);
|
199 |
+
if (c.length < 1) {
|
200 |
+
return;
|
201 |
+
}
|
202 |
+
var trg_id = c.find('.toc-mod-link').attr('id') ||
|
203 |
+
c.prevAll().find('.toc-mod-link').eq(-1).attr('id');
|
204 |
+
var highlighted_item = $();
|
205 |
+
if (trg_id !== undefined) {
|
206 |
+
highlighted_item = $('.toc a').filter(function(idx, elt) {
|
207 |
+
return $(elt).attr('data-toc-modified-id') === trg_id;
|
208 |
+
});
|
209 |
+
}
|
210 |
+
if (evt.type === 'execute') {
|
211 |
+
// remove the selected class and add execute class
|
212 |
+
// if the cell is selected again, it will be highligted as selected+running
|
213 |
+
highlighted_item.removeClass('toc-item-highlight-select').addClass('toc-item-highlight-execute');
|
214 |
+
} else {
|
215 |
+
$('.toc .toc-item-highlight-select').removeClass('toc-item-highlight-select');
|
216 |
+
highlighted_item.addClass('toc-item-highlight-select');
|
217 |
+
}
|
218 |
+
}
|
219 |
+
|
220 |
+
var create_navigate_menu = function(cfg, callback) {
|
221 |
+
$('#kernel_menu').parent().after('<li id="Navigate"/>')
|
222 |
+
$('#Navigate').addClass('dropdown').append($('<a/>').attr('href', '#').attr('id', 'Navigate_sub'))
|
223 |
+
$('#Navigate_sub').text('Navigate').addClass('dropdown-toggle').attr('data-toggle', 'dropdown')
|
224 |
+
$('#Navigate').append($('<ul/>').attr('id', 'Navigate_menu').addClass('dropdown-menu')
|
225 |
+
.append($("<div/>").attr("id", "navigate_menu").addClass('toc')))
|
226 |
+
|
227 |
+
if (cfg['nav_menu']) {
|
228 |
+
$('#Navigate_menu').css(cfg['nav_menu'])
|
229 |
+
$('#navigate_menu').css('width', $('#Navigate_menu').css('width'))
|
230 |
+
$('#navigate_menu').css('height', $('#Navigate_menu').height())
|
231 |
+
} else {
|
232 |
+
cfg.nav_menu = {};
|
233 |
+
events.on("before_save.Notebook",
|
234 |
+
function() {
|
235 |
+
try {
|
236 |
+
cfg.nav_menu['width'] = $('#Navigate_menu').css('width')
|
237 |
+
cfg.nav_menu['height'] = $('#Navigate_menu').css('height')
|
238 |
+
} catch (e) {
|
239 |
+
console.log("[toc2] Error in metadata (navigation menu) - Proceeding", e)
|
240 |
+
}
|
241 |
+
})
|
242 |
+
}
|
243 |
+
|
244 |
+
$('#Navigate_menu').resizable({
|
245 |
+
resize: function(event, ui) {
|
246 |
+
$('#navigate_menu').css('width', $('#Navigate_menu').css('width'))
|
247 |
+
$('#navigate_menu').css('height', $('#Navigate_menu').height())
|
248 |
+
},
|
249 |
+
stop: function(event, ui) {
|
250 |
+
cfg.nav_menu['width'] = $('#Navigate_menu').css('width')
|
251 |
+
cfg.nav_menu['height'] = $('#Navigate_menu').css('height')
|
252 |
+
}
|
253 |
+
})
|
254 |
+
|
255 |
+
callback && callback();
|
256 |
+
}
|
257 |
+
|
258 |
+
function setNotebookWidth(cfg, st) {
|
259 |
+
var margin = 20;
|
260 |
+
var nb_inner = $('#notebook-container');
|
261 |
+
var nb_wrap_w = $('#notebook').width();
|
262 |
+
var sidebar = $('#toc-wrapper');
|
263 |
+
var visible_sidebar = cfg.sideBar && sidebar.is(':visible');
|
264 |
+
var sidebar_w = visible_sidebar ? sidebar.outerWidth() : 0;
|
265 |
+
var available_space = nb_wrap_w - 2 * margin - sidebar_w;
|
266 |
+
var inner_css = {marginLeft: '', width: ''};
|
267 |
+
if (cfg.widenNotebook) {
|
268 |
+
inner_css.width = available_space;
|
269 |
+
}
|
270 |
+
if (visible_sidebar) {
|
271 |
+
var nb_inner_w = nb_inner.outerWidth();
|
272 |
+
inner_css.marginLeft = sidebar_w + margin; // shift notebook rightward to fit the sidebar in
|
273 |
+
if (available_space <= nb_inner_w) {
|
274 |
+
inner_css.width = available_space; // also slim notebook to fit sidebar
|
275 |
+
}
|
276 |
+
}
|
277 |
+
nb_inner.css(inner_css);
|
278 |
+
}
|
279 |
+
|
280 |
+
var saveTocPosition = function () {
|
281 |
+
var toc_wrapper = $('#toc-wrapper');
|
282 |
+
var new_values = toc_wrapper.hasClass('sidebar-wrapper') ? ['width'] : ['left', 'top', 'height', 'width'];
|
283 |
+
$.extend(toc_position, toc_wrapper.css(new_values));
|
284 |
+
setMd('toc_position', toc_position);
|
285 |
+
};
|
286 |
+
|
287 |
+
var makeUnmakeMinimized = function (cfg, animate) {
|
288 |
+
var open = cfg.sideBar || cfg.toc_section_display;
|
289 |
+
var new_css, wrap = $('#toc-wrapper');
|
290 |
+
var anim_opts = {duration: animate ? 'fast' : 0};
|
291 |
+
if (open) {
|
292 |
+
$('#toc').show();
|
293 |
+
new_css = cfg.sideBar ? {} : {height: toc_position.height, width: toc_position.width};
|
294 |
+
}
|
295 |
+
else {
|
296 |
+
new_css = {
|
297 |
+
height: wrap.outerHeight() - wrap.find('#toc').outerHeight(),
|
298 |
+
};
|
299 |
+
anim_opts.complete = function () {
|
300 |
+
$('#toc').hide();
|
301 |
+
$('#toc-wrapper').css('width', '');
|
302 |
+
};
|
303 |
+
}
|
304 |
+
wrap.toggleClass('closed', !open)
|
305 |
+
.animate(new_css, anim_opts)
|
306 |
+
.find('.hide-btn').attr('title', open ? 'Hide ToC' : 'Show ToC');
|
307 |
+
return open;
|
308 |
+
};
|
309 |
+
|
310 |
+
var makeUnmakeSidebar = function (cfg) {
|
311 |
+
var make_sidebar = cfg.sideBar;
|
312 |
+
var wrap = $('#toc-wrapper')
|
313 |
+
.toggleClass('sidebar-wrapper', make_sidebar)
|
314 |
+
.toggleClass('float-wrapper', !make_sidebar)
|
315 |
+
.resizable('option', 'handles', make_sidebar ? 'e' : 'all');
|
316 |
+
wrap.children('.ui-resizable-se').toggleClass('ui-icon', !make_sidebar);
|
317 |
+
wrap.children('.ui-resizable-e').toggleClass('ui-icon ui-icon-grip-dotted-vertical', make_sidebar);
|
318 |
+
if (make_sidebar) {
|
319 |
+
var sidebar_top = liveNotebook ? document.getElementById('site').top : 0
|
320 |
+
wrap.css({top: sidebar_top,height: "",left: 0});
|
321 |
+
}
|
322 |
+
else {
|
323 |
+
wrap.css({height: toc_position.height});
|
324 |
+
}
|
325 |
+
setNotebookWidth(cfg);
|
326 |
+
};
|
327 |
+
|
328 |
+
var create_toc_div = function(cfg, st) {
|
329 |
+
|
330 |
+
var callbackPageResize = function (evt) {
|
331 |
+
setNotebookWidth(cfg);
|
332 |
+
};
|
333 |
+
|
334 |
+
var toc_wrapper = $('<div id="toc-wrapper"/>')
|
335 |
+
.css('display', 'none')
|
336 |
+
.append(
|
337 |
+
$('<div id="toc-header"/>')
|
338 |
+
.append('<span class="header"/>')
|
339 |
+
.append(
|
340 |
+
$('<i class="fa fa-fw hide-btn" title="Hide ToC">')
|
341 |
+
.on('click', function (evt) {
|
342 |
+
cfg.toc_section_display = setMd('toc_section_display', !cfg.toc_section_display);
|
343 |
+
makeUnmakeMinimized(cfg, true);
|
344 |
+
})
|
345 |
+
).append(
|
346 |
+
$('<i class="fa fa-fw fa-refresh" title="Reload ToC">')
|
347 |
+
.on('click', function(evt) {
|
348 |
+
var icon = $(evt.currentTarget).addClass('fa-spin');
|
349 |
+
table_of_contents(cfg, st);
|
350 |
+
icon.removeClass('fa-spin');
|
351 |
+
})
|
352 |
+
).append(
|
353 |
+
$('<i class="fa fa-fw fa-cog" title="ToC settings"/>')
|
354 |
+
.on('click', function(evt) {
|
355 |
+
show_settings_dialog(cfg, st);
|
356 |
+
})
|
357 |
+
)
|
358 |
+
).append(
|
359 |
+
$("<div/>").attr("id", "toc").addClass('toc')
|
360 |
+
)
|
361 |
+
.prependTo(liveNotebook ? '#site' : document.body);
|
362 |
+
|
363 |
+
// enable dragging and save position on stop moving
|
364 |
+
toc_wrapper.draggable({
|
365 |
+
drag: function(event, ui) {
|
366 |
+
var make_sidebar = ui.position.left < 20; // 20 is snapTolerance
|
367 |
+
if (make_sidebar) {
|
368 |
+
ui.position.top = liveNotebook ? document.getElementById('site').top : 0
|
369 |
+
ui.position.left = 0;
|
370 |
+
}
|
371 |
+
if (make_sidebar !== cfg.sideBar) {
|
372 |
+
cfg.toc_section_display = setMd('toc_section_display', true);
|
373 |
+
cfg.sideBar = setMd('sideBar', make_sidebar);
|
374 |
+
makeUnmakeMinimized(cfg);
|
375 |
+
makeUnmakeSidebar(cfg);
|
376 |
+
}
|
377 |
+
}, //end of drag function
|
378 |
+
stop: saveTocPosition,
|
379 |
+
containment: 'parent',
|
380 |
+
snap: 'body, #site',
|
381 |
+
snapTolerance: 20,
|
382 |
+
});
|
383 |
+
|
384 |
+
toc_wrapper.resizable({
|
385 |
+
handles: 'all',
|
386 |
+
resize: function(event, ui) {
|
387 |
+
if (cfg.sideBar) {
|
388 |
+
// unset the height set by jquery resizable
|
389 |
+
$('#toc-wrapper').css('height', '');
|
390 |
+
setNotebookWidth(cfg, st)
|
391 |
+
}
|
392 |
+
},
|
393 |
+
start: function(event, ui) {
|
394 |
+
if (!cfg.sideBar) {
|
395 |
+
cfg.toc_section_display = setMd('toc_section_display', true);
|
396 |
+
makeUnmakeMinimized(cfg);
|
397 |
+
}
|
398 |
+
},
|
399 |
+
stop: saveTocPosition,
|
400 |
+
containment: 'parent',
|
401 |
+
minHeight: 100,
|
402 |
+
minWidth: 165,
|
403 |
+
});
|
404 |
+
|
405 |
+
// On header/menu/toolbar resize, resize the toc itself
|
406 |
+
$(window).on('resize', callbackPageResize);
|
407 |
+
if (liveNotebook) {
|
408 |
+
events.on("resize-header.Page toggle-all-headers", callbackPageResize);
|
409 |
+
$.extend(toc_position, IPython.notebook.metadata.toc.toc_position);
|
410 |
+
}
|
411 |
+
else {
|
412 |
+
// default to true for non-live notebook
|
413 |
+
cfg.toc_window_display = true;
|
414 |
+
}
|
415 |
+
// restore toc position at load
|
416 |
+
toc_wrapper.css(cfg.sideBar ? {width: toc_position.width} : toc_position);
|
417 |
+
// older toc2 versions stored string representations, so update those
|
418 |
+
if (cfg.toc_window_display === 'none') {
|
419 |
+
cfg.toc_window_display = setMd('toc_window_display', false);
|
420 |
+
}
|
421 |
+
if (cfg.toc_section_display === 'none') {
|
422 |
+
cfg.toc_section_display = setMd('toc_section_display', false);
|
423 |
+
}
|
424 |
+
toc_wrapper.toggle(cfg.toc_window_display);
|
425 |
+
makeUnmakeSidebar(cfg);
|
426 |
+
$("#toc_button").toggleClass('active', cfg.toc_window_display);
|
427 |
+
if (!cfg.toc_section_display) {
|
428 |
+
makeUnmakeMinimized(cfg);
|
429 |
+
}
|
430 |
+
};
|
431 |
+
|
432 |
+
//----------------------------------------------------------------------------
|
433 |
+
// on scroll - mark the toc item corresponding to the first header visible in
|
434 |
+
// the viewport with 'highlight_on_scroll' class
|
435 |
+
// some elements from https://stackoverflow.com/questions/20791374/jquery-check-if-element-is-visible-in-viewport
|
436 |
+
function highlightTocItemOnScroll(cfg, st) {
|
437 |
+
if (cfg.markTocItemOnScroll) {
|
438 |
+
var scrolling_elt = liveNotebook ? '#site' : window
|
439 |
+
$(scrolling_elt).scroll(function() {
|
440 |
+
var headerVisibleHeight = $('#header').is(':visible') ? $('#header').height() : 0
|
441 |
+
var headerHeight = liveNotebook ? headerVisibleHeight : 0
|
442 |
+
var bottom_of_screen = $(window).scrollTop() + $(scrolling_elt).height() + headerHeight;
|
443 |
+
var top_of_screen = $(window).scrollTop() + headerHeight;
|
444 |
+
//loop over all headers
|
445 |
+
all_headers.each(function(i, h) {
|
446 |
+
var top_of_element = $(h).offset().top;
|
447 |
+
|
448 |
+
if ((bottom_of_screen > top_of_element) && (top_of_screen < top_of_element)) {
|
449 |
+
// The element is visible
|
450 |
+
var trg_id = $(h).attr('data-toc-modified-id')
|
451 |
+
if (trg_id !== undefined) {
|
452 |
+
var highlighted_item = $('#toc a').filter(function(idx, elt) {
|
453 |
+
return $(elt).attr('data-toc-modified-id') === trg_id;
|
454 |
+
});
|
455 |
+
$('#toc .highlight_on_scroll').removeClass('highlight_on_scroll')
|
456 |
+
highlighted_item.parent().addClass('highlight_on_scroll')
|
457 |
+
}
|
458 |
+
return false;
|
459 |
+
} else {
|
460 |
+
// The element is not visible
|
461 |
+
// If the current header is already below the viewport then break
|
462 |
+
if (bottom_of_screen < top_of_element) return false
|
463 |
+
else return
|
464 |
+
}
|
465 |
+
})
|
466 |
+
});
|
467 |
+
}
|
468 |
+
}
|
469 |
+
//----------------------------------------------------------------------------
|
470 |
+
// TOC CELL -- if cfg.toc_cell=true, add and update a toc cell in the notebook.
|
471 |
+
// This cell, initially at the very beginning, can be moved.
|
472 |
+
// Its contents are automatically updated.
|
473 |
+
// Optionnaly, the sections in the toc can be numbered.
|
474 |
+
|
475 |
+
function process_cell_toc(cfg, st) {
|
476 |
+
var new_html = '<h1>' +
|
477 |
+
$('<div>').text(cfg.title_cell).html() + '<span class="tocSkip"></span></h1>\n' +
|
478 |
+
'<div class="toc">' +
|
479 |
+
$('#toc').html() +
|
480 |
+
'</div>';
|
481 |
+
if (!liveNotebook) {
|
482 |
+
if (cfg.toc_cell) {
|
483 |
+
$('.cell > .toc').parent(':has(.tocSkip)')
|
484 |
+
.html(new_html)
|
485 |
+
.find('.toc-item li a')
|
486 |
+
.on('click', callback_toc_link_click);
|
487 |
+
}
|
488 |
+
return;
|
489 |
+
}
|
490 |
+
var cell_toc;
|
491 |
+
// look for a possible toc cell
|
492 |
+
var cells = IPython.notebook.get_cells();
|
493 |
+
var lcells = cells.length;
|
494 |
+
for (var i = 0; i < lcells; i++) {
|
495 |
+
if (cells[i].metadata.toc) {
|
496 |
+
// delete if we don't want it
|
497 |
+
if (!cfg.toc_cell) {
|
498 |
+
return IPython.notebook.delete_cell(i);
|
499 |
+
}
|
500 |
+
cell_toc = cells[i];
|
501 |
+
break;
|
502 |
+
}
|
503 |
+
}
|
504 |
+
//if toc_cell=true, we want a cell_toc.
|
505 |
+
// If it does not exist, create it at the beginning of the notebook
|
506 |
+
if (cfg.toc_cell) {
|
507 |
+
if (cell_toc === undefined) {
|
508 |
+
// set rendering_toc_cell flag to avoid loop on insert_cell_above
|
509 |
+
rendering_toc_cell = true;
|
510 |
+
cell_toc = IPython.notebook.insert_cell_above('markdown', 0);
|
511 |
+
cell_toc.metadata.toc = true;
|
512 |
+
rendering_toc_cell = false;
|
513 |
+
}
|
514 |
+
// set rendering_toc_cell flag to avoid loop on render
|
515 |
+
rendering_toc_cell = true;
|
516 |
+
cell_toc.set_text(new_html);
|
517 |
+
cell_toc.render();
|
518 |
+
rendering_toc_cell = false;
|
519 |
+
cell_toc.element.find('.toc-item li a').on('click', callback_toc_link_click);
|
520 |
+
}
|
521 |
+
} //end function process_cell_toc --------------------------
|
522 |
+
|
523 |
+
var collapse_by_id = function(trg_id, show, trigger_event) {
|
524 |
+
var anchors = $('.toc .toc-item > li > span > a').filter(function(idx, elt) {
|
525 |
+
return $(elt).attr('data-toc-modified-id') === trg_id;
|
526 |
+
});
|
527 |
+
anchors.siblings('i')
|
528 |
+
.toggleClass('fa-caret-right', !show)
|
529 |
+
.toggleClass('fa-caret-down', show);
|
530 |
+
anchors.parent().siblings('ul')[show ? 'slideDown' : 'slideUp']('fast');
|
531 |
+
if (trigger_event !== false) {
|
532 |
+
// fire event for collapsible_heading to catch
|
533 |
+
var cell = $(document.getElementById(trg_id)).closest('.cell').data('cell');
|
534 |
+
events.trigger((show ? 'un' : '') + 'collapse.Toc', {
|
535 |
+
cell: cell
|
536 |
+
});
|
537 |
+
}
|
538 |
+
};
|
539 |
+
|
540 |
+
var callback_toc2_collapsible_headings = function(evt, data) {
|
541 |
+
var trg_id = data.cell.element.find(':header').filter(function(idx, elt) {
|
542 |
+
return Boolean($(elt).attr('data-toc-modified-id'));
|
543 |
+
}).attr('data-toc-modified-id');
|
544 |
+
var show = evt.type.indexOf('un') >= 0;
|
545 |
+
// use trigger_event false to avoid re-triggering collapsible_headings
|
546 |
+
collapse_by_id(trg_id, show, false);
|
547 |
+
};
|
548 |
+
|
549 |
+
var callback_collapser = function(evt) {
|
550 |
+
var clicked_i = $(evt.currentTarget);
|
551 |
+
var trg_id = clicked_i.siblings('a').attr('data-toc-modified-id');
|
552 |
+
var show = clicked_i.hasClass('fa-caret-right');
|
553 |
+
collapse_by_id(trg_id, show);
|
554 |
+
};
|
555 |
+
|
556 |
+
// Table of Contents =================================================================
|
557 |
+
var table_of_contents = function(cfg, st) {
|
558 |
+
|
559 |
+
// if this call is a result of toc_cell rendering, do nothing to avoid
|
560 |
+
// looping, as we're already in a table_of_contents call
|
561 |
+
if (rendering_toc_cell) {
|
562 |
+
return
|
563 |
+
}
|
564 |
+
|
565 |
+
// In a live notebook, read_config will have been called already, but
|
566 |
+
// in non-live notebooks, ensure that all config values are defined.
|
567 |
+
if (!liveNotebook) {
|
568 |
+
cfg = $.extend(true, {}, default_cfg, cfg);
|
569 |
+
}
|
570 |
+
|
571 |
+
var toc_wrapper = $("#toc-wrapper");
|
572 |
+
if (toc_wrapper.length === 0) { // toc window doesn't exist at all
|
573 |
+
create_toc_div(cfg, st); // create it
|
574 |
+
highlightTocItemOnScroll(cfg, st); // initialize highlighting on scroll
|
575 |
+
}
|
576 |
+
var ul = $('<ul/>').addClass('toc-item');
|
577 |
+
|
578 |
+
// update sidebar/window title
|
579 |
+
$('#toc-header > .header').text(cfg.title_sidebar + ' ');
|
580 |
+
|
581 |
+
// update toc element
|
582 |
+
$("#toc").empty().append(ul);
|
583 |
+
|
584 |
+
var depth = 1;
|
585 |
+
// update all headers with id that are in rendered text cell outputs,
|
586 |
+
// excepting any header which contains an html tag with class 'tocSkip'
|
587 |
+
// eg in ## title <a class='tocSkip'>,
|
588 |
+
// or the ToC cell.
|
589 |
+
all_headers = $('.text_cell_render').find('[id]:header:not(:has(.tocSkip))');
|
590 |
+
var min_lvl = 1 + Number(Boolean(cfg.skip_h1_title)),
|
591 |
+
lbl_ary = [];
|
592 |
+
for (; min_lvl <= 6; min_lvl++) {
|
593 |
+
if (all_headers.is('h' + min_lvl)) {
|
594 |
+
break;
|
595 |
+
}
|
596 |
+
}
|
597 |
+
lbl_ary[0] = cfg.base_numbering-1 // begin numbering at base_numbering
|
598 |
+
for (var i = min_lvl+1; i <= 6; i++) {
|
599 |
+
lbl_ary[i - min_lvl] = 0;
|
600 |
+
}
|
601 |
+
|
602 |
+
//loop over all headers
|
603 |
+
all_headers.each(function(i, h) {
|
604 |
+
// remove pre-existing number
|
605 |
+
$(h).children('.toc-item-num').remove();
|
606 |
+
|
607 |
+
var level = parseInt(h.tagName.slice(1), 10) - min_lvl + 1;
|
608 |
+
// skip below threshold, or h1 ruled out by cfg.skip_h1_title
|
609 |
+
if (level < 1 || level > cfg.threshold) {
|
610 |
+
return;
|
611 |
+
}
|
612 |
+
h = $(h);
|
613 |
+
// numbered heading labels
|
614 |
+
var num_str = incr_lbl(lbl_ary, level - 1).join('.');
|
615 |
+
if (cfg.number_sections) {
|
616 |
+
$('<span>')
|
617 |
+
.text(num_str + '\u00a0\u00a0')
|
618 |
+
.addClass('toc-item-num')
|
619 |
+
.prependTo(h);
|
620 |
+
}
|
621 |
+
|
622 |
+
// walk down levels
|
623 |
+
for (; depth < level; depth++) {
|
624 |
+
var li = ul.children('li:last-child');
|
625 |
+
if (li.length < 1) {
|
626 |
+
li = $('<li>').appendTo(ul);
|
627 |
+
}
|
628 |
+
ul = $('<ul class="toc-item">').appendTo(li);
|
629 |
+
}
|
630 |
+
// walk up levels
|
631 |
+
for (; depth > level; depth--) {
|
632 |
+
ul = ul.parent().closest('.toc-item');
|
633 |
+
}
|
634 |
+
|
635 |
+
var toc_mod_id = h.attr('id') + '-' + num_str;
|
636 |
+
h.attr('data-toc-modified-id', toc_mod_id);
|
637 |
+
// add an anchor with modified id (if it doesn't already exist)
|
638 |
+
h.children('.toc-mod-link').remove();
|
639 |
+
$('<a>').addClass('toc-mod-link').attr('id', toc_mod_id).prependTo(h);
|
640 |
+
|
641 |
+
// Create toc entry, append <li> tag to the current <ol>.
|
642 |
+
ul.append(
|
643 |
+
$('<li>').append(
|
644 |
+
$('<span>').append(
|
645 |
+
make_link(h, toc_mod_id))));
|
646 |
+
});
|
647 |
+
|
648 |
+
// update navigation menu
|
649 |
+
if (cfg.navigate_menu) {
|
650 |
+
var pop_nav = function() { //callback for create_nav_menu
|
651 |
+
$('#navigate_menu').empty().append($('#toc > .toc-item').clone());
|
652 |
+
}
|
653 |
+
if ($('#Navigate_menu').length == 0) {
|
654 |
+
create_navigate_menu((liveNotebook ? IPython.notebook.metadata.toc : cfg), pop_nav);
|
655 |
+
} else {
|
656 |
+
pop_nav()
|
657 |
+
}
|
658 |
+
} else { // If navigate_menu is false but the menu already exists, then remove it
|
659 |
+
if ($('#Navigate_menu').length > 0) $('#Navigate_sub').remove()
|
660 |
+
}
|
661 |
+
|
662 |
+
// if cfg.toc_cell=true, find/add and update a toc cell in the notebook.
|
663 |
+
process_cell_toc(cfg, st);
|
664 |
+
|
665 |
+
// add collapse controls
|
666 |
+
$('<i>')
|
667 |
+
.addClass('fa fa-fw fa-caret-down')
|
668 |
+
.on('click', callback_collapser) // callback
|
669 |
+
.prependTo('.toc li:has(ul) > span'); // only if li has descendants
|
670 |
+
$('<i>').addClass('fa fa-fw ').prependTo('.toc li:not(:has(ul)) > span'); // otherwise still add <i> to keep things aligned
|
671 |
+
|
672 |
+
events[cfg.collapse_to_match_collapsible_headings ? 'on' : 'off'](
|
673 |
+
'collapse.CollapsibleHeading uncollapse.CollapsibleHeading', callback_toc2_collapsible_headings);
|
674 |
+
};
|
675 |
+
|
676 |
+
var toggle_toc = function(cfg, st) {
|
677 |
+
// toggle draw (first because of first-click behavior)
|
678 |
+
var wrap = $("#toc-wrapper");
|
679 |
+
var show = wrap.is(':hidden');
|
680 |
+
wrap.toggle(show);
|
681 |
+
cfg['toc_window_display'] = setMd('toc_window_display', show);
|
682 |
+
setNotebookWidth(cfg);
|
683 |
+
table_of_contents(cfg);
|
684 |
+
$("#toc_button").toggleClass('active', show);
|
685 |
+
};
|
686 |
+
|
687 |
+
var show_settings_dialog = function (cfg, st) {
|
688 |
+
|
689 |
+
var callback_setting_change = function (evt) {
|
690 |
+
var input = $(evt.currentTarget);
|
691 |
+
var md_key = input.attr('tocMdKey');
|
692 |
+
cfg[md_key] = setMd(md_key, input.attr('type') == 'checkbox' ? Boolean(input.prop('checked')) : input.val());
|
693 |
+
table_of_contents(cfg, st);
|
694 |
+
};
|
695 |
+
var build_setting_input = function (md_key, md_label, input_type) {
|
696 |
+
var opts = liveNotebook ? IPython.notebook.metadata.toc : cfg;
|
697 |
+
var id = 'toc-settings-' + md_key;
|
698 |
+
var fg = $('<div>').append(
|
699 |
+
$('<label>').text(md_label).attr('for', id));
|
700 |
+
var input = $('<input/>').attr({
|
701 |
+
type: input_type || 'text', id: id, tocMdKey: md_key,
|
702 |
+
}).on('change', callback_setting_change);
|
703 |
+
if (input_type == 'checkbox') {
|
704 |
+
fg.addClass('checkbox');
|
705 |
+
input
|
706 |
+
.prop('checked', opts[md_key])
|
707 |
+
.prependTo(fg.children('label'));
|
708 |
+
}
|
709 |
+
else {
|
710 |
+
fg.addClass('form-group');
|
711 |
+
input
|
712 |
+
.addClass('form-control')
|
713 |
+
.val(opts[md_key])
|
714 |
+
.appendTo(fg);
|
715 |
+
}
|
716 |
+
return fg;
|
717 |
+
};
|
718 |
+
|
719 |
+
var modal = $('<div class="modal fade" role="dialog"/>');
|
720 |
+
var dialog_content = $("<div/>")
|
721 |
+
.addClass("modal-content")
|
722 |
+
.appendTo($('<div class="modal-dialog">').appendTo(modal));
|
723 |
+
$('<div class="modal-header">')
|
724 |
+
.append('<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>')
|
725 |
+
.append('<h4 class="modal-title">ToC2 settings</h4>')
|
726 |
+
.on('mousedown', function() { $('.modal').draggable({handle: '.modal-header'});})
|
727 |
+
.appendTo(dialog_content);
|
728 |
+
$('<div>')
|
729 |
+
.addClass('modal-body')
|
730 |
+
.append([
|
731 |
+
$('<div>').text(
|
732 |
+
'These settings apply to this notebook only, and are stored in its metadata. ' +
|
733 |
+
liveNotebook ? 'The defaults for new notebooks can be edited from the nbextensions configurator.' :
|
734 |
+
'The settings won\'t persist in non-live notebooks though.'),
|
735 |
+
build_setting_input('number_sections', 'Automatically number headings', 'checkbox'),
|
736 |
+
build_setting_input('skip_h1_title', 'Leave h1 items out of ToC', 'checkbox'),
|
737 |
+
build_setting_input('base_numbering', 'Begin numbering at'),
|
738 |
+
build_setting_input('toc_cell', 'Add notebook ToC cell', 'checkbox'),
|
739 |
+
build_setting_input('title_cell', 'ToC cell title'),
|
740 |
+
build_setting_input('title_sidebar', 'Sidebar/window title'),
|
741 |
+
build_setting_input('sideBar', 'Display as a sidebar (otherwise as a floating window)', 'checkbox'),
|
742 |
+
build_setting_input('toc_window_display', 'Display ToC window/sidebar at startup', 'checkbox'),
|
743 |
+
build_setting_input('toc_section_display', 'Expand window/sidebar at startup', 'checkbox'),
|
744 |
+
])
|
745 |
+
.appendTo(dialog_content);
|
746 |
+
$('<div class="modal-footer">')
|
747 |
+
.append('<button class="btn btn-default btn-sm btn-primary" data-dismiss="modal">Ok</button>')
|
748 |
+
.appendTo(dialog_content);
|
749 |
+
// focus button on open
|
750 |
+
modal.on('shown.bs.modal', function () {
|
751 |
+
setTimeout(function () {
|
752 |
+
dialog_content.find('.modal-footer button').last().focus();
|
753 |
+
}, 0);
|
754 |
+
});
|
755 |
+
|
756 |
+
if (liveNotebook) {
|
757 |
+
Jupyter.notebook.keyboard_manager.disable();
|
758 |
+
modal.on('hidden.bs.modal', function () {
|
759 |
+
modal.remove(); // destroy modal on hide
|
760 |
+
Jupyter.notebook.keyboard_manager.enable();
|
761 |
+
Jupyter.notebook.keyboard_manager.command_mode();
|
762 |
+
var cell = Jupyter.notebook.get_selected_cell();
|
763 |
+
if (cell) cell.select();
|
764 |
+
});
|
765 |
+
}
|
766 |
+
|
767 |
+
// Try to use bootstrap modal, but bootstrap's js may not be available
|
768 |
+
// (e.g. as in non-live notebook), so we provide a poor-man's version
|
769 |
+
try {
|
770 |
+
return modal.modal({backdrop: 'static'});
|
771 |
+
}
|
772 |
+
catch (err) {
|
773 |
+
// show the backdrop
|
774 |
+
$(document.body).addClass('modal-open');
|
775 |
+
var $backdrop = $('<div class="modal-backdrop fade">').appendTo($(document.body));
|
776 |
+
$backdrop[0].offsetWidth; // force reflow
|
777 |
+
$backdrop.addClass('in');
|
778 |
+
// hook up removals
|
779 |
+
modal.on('click', '[data-dismiss="modal"]', function modal_close() {
|
780 |
+
// hide the modal foreground
|
781 |
+
modal.removeClass('in');
|
782 |
+
setTimeout(function on_foreground_hidden() {
|
783 |
+
modal.remove();
|
784 |
+
// now hide the backdrop
|
785 |
+
$backdrop.removeClass('in');
|
786 |
+
// wait for transition
|
787 |
+
setTimeout(function on_backdrop_hidden() {
|
788 |
+
$(document.body).removeClass('modal-open');
|
789 |
+
$backdrop.remove();
|
790 |
+
}, 150);
|
791 |
+
}, 300);
|
792 |
+
});
|
793 |
+
// wait for transition
|
794 |
+
setTimeout(function () {
|
795 |
+
// now show the modal foreground
|
796 |
+
modal.appendTo(document.body).show().scrollTop(0);
|
797 |
+
modal[0].offsetWidth; // force reflow
|
798 |
+
modal.addClass('in');
|
799 |
+
// wait for transition, then trigger callbacks
|
800 |
+
setTimeout(function on_foreground_shown() {
|
801 |
+
modal.trigger('shown.bs.modal');
|
802 |
+
}, 300);
|
803 |
+
}, 150);
|
804 |
+
return modal;
|
805 |
+
}
|
806 |
+
};
|
807 |
+
|
808 |
+
return {
|
809 |
+
highlight_toc_item: highlight_toc_item,
|
810 |
+
table_of_contents: table_of_contents,
|
811 |
+
toggle_toc: toggle_toc,
|
812 |
+
read_config: read_config,
|
813 |
+
};
|
814 |
+
});
|
815 |
+
// export table_of_contents to global namespace for backwards compatibility
|
816 |
+
// Do export synchronously, so that it's defined as soon as this file is loaded
|
817 |
+
if (!requirejs.specified('base/js/namespace')) {
|
818 |
+
window.table_of_contents = function(cfg, st) {
|
819 |
+
"use strict";
|
820 |
+
// use require to ensure the module is correctly loaded before the
|
821 |
+
// actual call is made
|
822 |
+
requirejs(['nbextensions/toc2/toc2'], function(toc2) {
|
823 |
+
toc2.table_of_contents(cfg, st);
|
824 |
+
});
|
825 |
+
};
|
826 |
+
}
|
.local/share/jupyter/nbextensions/toc2/toc2.yaml
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Type: Jupyter Notebook Extension
|
2 |
+
Name: Table of Contents (2)
|
3 |
+
Description: The toc2 extension enables to collect all running headers and display them in a floating window, as a sidebar or with a navigation menu. The extension is also draggable, resizable, collapsable, dockable and features automatic numerotation with unique links ids, and an optional toc cell.
|
4 |
+
Link: README.md
|
5 |
+
Icon: icon.png
|
6 |
+
Main: main.js
|
7 |
+
Compatibility: 4.x, 5.x
|
8 |
+
Parameters:
|
9 |
+
- name: toc2.number_sections
|
10 |
+
description: Automatically number notebook's sections
|
11 |
+
input_type: checkbox
|
12 |
+
default: true
|
13 |
+
- name: toc2.threshold
|
14 |
+
description: Maximum level of nested sections to display on the tables of contents
|
15 |
+
input_type: number
|
16 |
+
min: -1
|
17 |
+
step: 1
|
18 |
+
default: 4
|
19 |
+
|
20 |
+
- name: toc2.skip_h1_title
|
21 |
+
description: |
|
22 |
+
Skip h1 headings from numbering, so that they can serve as a notebook title.
|
23 |
+
See the README for details, caveats and alternatives
|
24 |
+
input_type: checkbox
|
25 |
+
default: false
|
26 |
+
|
27 |
+
- name: toc2.toc_cell
|
28 |
+
description: Add a Table of Contents cell at the top of the notebook
|
29 |
+
input_type: checkbox
|
30 |
+
default: false
|
31 |
+
|
32 |
+
- name: toc2.title_cell
|
33 |
+
description: Default heading used for ToC cell (can also be set per-notebook)
|
34 |
+
default: 'Table of Contents'
|
35 |
+
|
36 |
+
- name: toc2.title_sidebar
|
37 |
+
description: Default title used for ToC sidebar/window (can also be set per-notebook)
|
38 |
+
default: 'Contents'
|
39 |
+
|
40 |
+
- name: toc2.toc_window_display
|
41 |
+
description: Display toc window/sidebar at startup
|
42 |
+
input_type: checkbox
|
43 |
+
default: false
|
44 |
+
- name: toc2.sideBar
|
45 |
+
description: Display Table of Contents as a sidebar (otherwise as a floating window)
|
46 |
+
input_type: checkbox
|
47 |
+
default: true
|
48 |
+
- name: toc2.markTocItemOnScroll
|
49 |
+
description: Mark toc item of header in viewport when scrolling
|
50 |
+
input_type: checkbox
|
51 |
+
default: true
|
52 |
+
- name: toc2.widenNotebook
|
53 |
+
description: Widen the display area to fit the browser window (may be useful with sidebar option)
|
54 |
+
input_type: checkbox
|
55 |
+
default: true
|
56 |
+
- name: toc2.navigate_menu
|
57 |
+
description: Display Table of Contents as a navigation menu
|
58 |
+
input_type: checkbox
|
59 |
+
default: true
|
60 |
+
- name: toc2.moveMenuLeft
|
61 |
+
description: Move notebook's title and menu on the left instead of being centered -- This provides a better look when the toc/sidebar is present
|
62 |
+
input_type: checkbox
|
63 |
+
default: true
|
64 |
+
|
65 |
+
- name: toc2.collapse_to_match_collapsible_headings
|
66 |
+
input_type: checkbox
|
67 |
+
default: false
|
68 |
+
description: |
|
69 |
+
Collapse/uncollapse ToC sections when the collapsible_headings nbextension
|
70 |
+
is used to collapse/uncollapse sections in the notebook. For the inverse
|
71 |
+
behaviour, see collapsible_headings' configuration
|
72 |
+
|
73 |
+
- name: toc2.colors.hover_highlight
|
74 |
+
input_type: color
|
75 |
+
description: Hover color in toc
|
76 |
+
default: "#DAA520"
|
77 |
+
- name: toc2.colors.on_scroll
|
78 |
+
input_type: color
|
79 |
+
description: Color of highlight mark on scrolling
|
80 |
+
default: '#2447f0'
|
81 |
+
- name: toc2.colors.selected_highlight
|
82 |
+
input_type: color
|
83 |
+
description: Color of sections with selected elements
|
84 |
+
default: "#FFD700"
|
85 |
+
- name: toc2.colors.running_highlight
|
86 |
+
input_type: color
|
87 |
+
description: Color of sections with running cells
|
88 |
+
default: "#FF0000"
|
89 |
+
- name: toc2.colors.wrapper_background
|
90 |
+
input_type: color
|
91 |
+
description: Color of wrapper window background
|
92 |
+
default: "#FFFFFF"
|
93 |
+
- name: toc2.colors.sidebar_border
|
94 |
+
input_type: color
|
95 |
+
description: Color of sidebar border
|
96 |
+
default: "#EEEEEE"
|
97 |
+
- name: toc2.colors.navigate_text
|
98 |
+
input_type: color
|
99 |
+
description: Color of navigate text
|
100 |
+
default: "#333333"
|
101 |
+
- name: toc2.colors.navigate_num
|
102 |
+
input_type: color
|
103 |
+
description: Color of navigate number
|
104 |
+
default: "#000000"
|
.local/share/jupyter/nbextensions/toggle_all_line_numbers/main.js
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// toggle on/off linenumber display in all codecells
|
2 |
+
|
3 |
+
define([
|
4 |
+
'jquery',
|
5 |
+
'base/js/namespace'
|
6 |
+
], function(
|
7 |
+
$,
|
8 |
+
Jupyter
|
9 |
+
) {
|
10 |
+
"use strict";
|
11 |
+
|
12 |
+
// define default values for config parameters
|
13 |
+
var params = {
|
14 |
+
toggle_all_linenumbers_hotkey : 'Alt-N',
|
15 |
+
toggle_all_linenumbers_enable_hotkey : true
|
16 |
+
};
|
17 |
+
|
18 |
+
// to be called once config is loaded, this updates default config vals
|
19 |
+
// with the ones specified by the server's config file
|
20 |
+
var update_params = function() {
|
21 |
+
var config = Jupyter.notebook.config;
|
22 |
+
for (var key in params) {
|
23 |
+
if (config.data.hasOwnProperty(key) ){
|
24 |
+
params[key] = config.data[key];
|
25 |
+
}
|
26 |
+
}
|
27 |
+
};
|
28 |
+
|
29 |
+
var toggle_all = function() {
|
30 |
+
var toolbar_button = $('#toggle_all_linenumbers');
|
31 |
+
toolbar_button.toggleClass('active', !toolbar_button.hasClass('active'));
|
32 |
+
var cells = Jupyter.notebook.get_cells();
|
33 |
+
for(var i in cells ){
|
34 |
+
cells[i].toggle_line_numbers();
|
35 |
+
}
|
36 |
+
};
|
37 |
+
|
38 |
+
// define action, register with ActionHandler instance
|
39 |
+
var prefix = 'auto';
|
40 |
+
var action_name = 'toggle-all-line-numbers';
|
41 |
+
var action = {
|
42 |
+
icon: 'fa-list-ol',
|
43 |
+
help: 'Toggle linenumbers in all codecells',
|
44 |
+
help_index : 'zz',
|
45 |
+
id: 'toggle_all_linenumbers',
|
46 |
+
handler: toggle_all
|
47 |
+
};
|
48 |
+
var action_full_name; // will be set on registration
|
49 |
+
|
50 |
+
var initialize = function () {
|
51 |
+
// update default config vals with the newly loaded ones
|
52 |
+
update_params();
|
53 |
+
|
54 |
+
// register actions with ActionHandler instance
|
55 |
+
action_full_name = Jupyter.keyboard_manager.actions.register(action, action_name, prefix);
|
56 |
+
|
57 |
+
// create toolbar button
|
58 |
+
Jupyter.toolbar.add_buttons_group([action_full_name]);
|
59 |
+
|
60 |
+
// (maybe) define hotkey
|
61 |
+
if (params.toggle_all_linenumbers_enable_hotkey &&
|
62 |
+
params.toggle_all_linenumbers_hotkey) {
|
63 |
+
|
64 |
+
console.log('toggle_all_linenumbers enabling hotkey:',
|
65 |
+
params.toggle_all_linenumbers_hotkey);
|
66 |
+
|
67 |
+
Jupyter.keyboard_manager.edit_shortcuts.add_shortcut(
|
68 |
+
params.toggle_all_linenumbers_hotkey, action_full_name);
|
69 |
+
Jupyter.keyboard_manager.command_shortcuts.add_shortcut(
|
70 |
+
params.toggle_all_linenumbers_hotkey, action_full_name);
|
71 |
+
}
|
72 |
+
};
|
73 |
+
|
74 |
+
var load_ipython_extension = function() {
|
75 |
+
return Jupyter.notebook.config.loaded.then(initialize);
|
76 |
+
};
|
77 |
+
|
78 |
+
var extension = {
|
79 |
+
load_ipython_extension : load_ipython_extension
|
80 |
+
};
|
81 |
+
return extension;
|
82 |
+
});
|
.local/share/jupyter/nbextensions/toggle_all_line_numbers/main.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Type: IPython Notebook Extension
|
2 |
+
Name: Toggle all line numbers
|
3 |
+
Description: "Add a toolbar button and hotkey to toggle all cells' line numbers on or off"
|
4 |
+
Icon: icon.png
|
5 |
+
Link: readme.md
|
6 |
+
Main: main.js
|
7 |
+
Compatibility: 3.x, 4.x, 5.x
|
8 |
+
Parameters:
|
9 |
+
- name: toggle_all_linenumbers_hotkey
|
10 |
+
description: Hotkey combination to which to bind linenumber-toggling
|
11 |
+
input_type: hotkey
|
12 |
+
default: Alt-N
|
13 |
+
- name: toggle_all_linenumbers_enable_hotkey
|
14 |
+
description: Enable the hotkey
|
15 |
+
input_type: checkbox
|
16 |
+
default: true
|
.local/share/jupyter/nbextensions/toggle_all_line_numbers/readme.md
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Toggle all line numbers
|
2 |
+
=======================
|
3 |
+
This extension adds a toolbar button, along with an optional hotkey,
|
4 |
+
to toggle all cells' line numbers on or off in one action.
|
5 |
+
|
.local/share/jupyter/nbextensions/tree-filter/demo.gif
ADDED
![]() |
.local/share/jupyter/nbextensions/varInspector/__pycache__/var_list.cpython-310.pyc
ADDED
Binary file (2.02 kB). View file
|
|
.local/share/jupyter/nbextensions/varInspector/demo.gif
ADDED
![]() |
.local/share/jupyter/nbextensions/varInspector/jquery.tablesorter.min.js
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
!function(a){"function"==typeof define&&define.amd?define(["jquery"],a):"object"==typeof module&&"object"==typeof module.exports?module.exports=a(require("jquery")):a(jQuery)}(function(a){return function(a){"use strict";var b=a.tablesorter={version:"2.25.7",parsers:[],widgets:[],defaults:{theme:"default",widthFixed:!1,showProcessing:!1,headerTemplate:"{content}",onRenderTemplate:null,onRenderHeader:null,cancelSelection:!0,tabIndex:!0,dateFormat:"mmddyyyy",sortMultiSortKey:"shiftKey",sortResetKey:"ctrlKey",usNumberFormat:!0,delayInit:!1,serverSideSorting:!1,resort:!0,headers:{},ignoreCase:!0,sortForce:null,sortList:[],sortAppend:null,sortStable:!1,sortInitialOrder:"asc",sortLocaleCompare:!1,sortReset:!1,sortRestart:!1,emptyTo:"bottom",stringTo:"max",duplicateSpan:!0,textExtraction:"basic",textAttribute:"data-text",textSorter:null,numberSorter:null,widgets:[],widgetOptions:{zebra:["even","odd"]},initWidgets:!0,widgetClass:"widget-{name}",initialized:null,tableClass:"",cssAsc:"",cssDesc:"",cssNone:"",cssHeader:"",cssHeaderRow:"",cssProcessing:"",cssChildRow:"tablesorter-childRow",cssInfoBlock:"tablesorter-infoOnly",cssNoSort:"tablesorter-noSort",cssIgnoreRow:"tablesorter-ignoreRow",cssIcon:"tablesorter-icon",cssIconNone:"",cssIconAsc:"",cssIconDesc:"",pointerClick:"click",pointerDown:"mousedown",pointerUp:"mouseup",selectorHeaders:"> thead th, > thead td",selectorSort:"th, td",selectorRemove:".remove-me",debug:!1,headerList:[],empties:{},strings:{},parsers:[]},css:{table:"tablesorter",cssHasChild:"tablesorter-hasChildRow",childRow:"tablesorter-childRow",colgroup:"tablesorter-colgroup",header:"tablesorter-header",headerRow:"tablesorter-headerRow",headerIn:"tablesorter-header-inner",icon:"tablesorter-icon",processing:"tablesorter-processing",sortAsc:"tablesorter-headerAsc",sortDesc:"tablesorter-headerDesc",sortNone:"tablesorter-headerUnSorted"},language:{sortAsc:"Ascending sort applied, ",sortDesc:"Descending sort applied, ",sortNone:"No sort applied, ",sortDisabled:"sorting is disabled",nextAsc:"activate to apply an ascending sort",nextDesc:"activate to apply a descending sort",nextNone:"activate to remove the sort"},regex:{templateContent:/\{content\}/g,templateIcon:/\{icon\}/g,templateName:/\{name\}/i,spaces:/\s+/g,nonWord:/\W/g,formElements:/(input|select|button|textarea)/i,chunk:/(^([+\-]?(?:\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)?$|^0x[0-9a-f]+$|\d+)/gi,chunks:/(^\\0|\\0$)/,hex:/^0x[0-9a-f]+$/i,comma:/,/g,digitNonUS:/[\s|\.]/g,digitNegativeTest:/^\s*\([.\d]+\)/,digitNegativeReplace:/^\s*\(([.\d]+)\)/,digitTest:/^[\-+(]?\d+[)]?$/,digitReplace:/[,.'"\s]/g},string:{max:1,min:-1,emptymin:1,emptymax:-1,zero:0,none:0,"null":0,top:!0,bottom:!1},keyCodes:{enter:13},dates:{},instanceMethods:{},setup:function(c,d){if(!c||!c.tHead||0===c.tBodies.length||c.hasInitialized===!0)return void(d.debug&&(c.hasInitialized?console.warn("Stopping initialization. Tablesorter has already been initialized"):console.error("Stopping initialization! No table, thead or tbody",c)));var e="",f=a(c),g=a.metadata;c.hasInitialized=!1,c.isProcessing=!0,c.config=d,a.data(c,"tablesorter",d),d.debug&&(console[console.group?"group":"log"]("Initializing tablesorter"),a.data(c,"startoveralltimer",new Date)),d.supportsDataObject=function(a){return a[0]=parseInt(a[0],10),a[0]>1||1===a[0]&&parseInt(a[1],10)>=4}(a.fn.jquery.split(".")),d.emptyTo=d.emptyTo.toLowerCase(),d.stringTo=d.stringTo.toLowerCase(),d.last={sortList:[],clickedIndex:-1},/tablesorter\-/.test(f.attr("class"))||(e=""!==d.theme?" tablesorter-"+d.theme:""),d.table=c,d.$table=f.addClass(b.css.table+" "+d.tableClass+e).attr("role","grid"),d.$headers=f.find(d.selectorHeaders),d.namespace?d.namespace="."+d.namespace.replace(b.regex.nonWord,""):d.namespace=".tablesorter"+Math.random().toString(16).slice(2),d.$table.children().children("tr").attr("role","row"),d.$tbodies=f.children("tbody:not(."+d.cssInfoBlock+")").attr({"aria-live":"polite","aria-relevant":"all"}),d.$table.children("caption").length&&(e=d.$table.children("caption")[0],e.id||(e.id=d.namespace.slice(1)+"caption"),d.$table.attr("aria-labelledby",e.id)),d.widgetInit={},d.textExtraction=d.$table.attr("data-text-extraction")||d.textExtraction||"basic",b.buildHeaders(d),b.fixColumnWidth(c),b.addWidgetFromClass(c),b.applyWidgetOptions(c),b.setupParsers(d),d.totalRows=0,d.delayInit||b.buildCache(d),b.bindEvents(c,d.$headers,!0),b.bindMethods(d),d.supportsDataObject&&"undefined"!=typeof f.data().sortlist?d.sortList=f.data().sortlist:g&&f.metadata()&&f.metadata().sortlist&&(d.sortList=f.metadata().sortlist),b.applyWidget(c,!0),d.sortList.length>0?b.sortOn(d,d.sortList,{},!d.initWidgets):(b.setHeadersCss(d),d.initWidgets&&b.applyWidget(c,!1)),d.showProcessing&&f.unbind("sortBegin"+d.namespace+" sortEnd"+d.namespace).bind("sortBegin"+d.namespace+" sortEnd"+d.namespace,function(a){clearTimeout(d.timerProcessing),b.isProcessing(c),"sortBegin"===a.type&&(d.timerProcessing=setTimeout(function(){b.isProcessing(c,!0)},500))}),c.hasInitialized=!0,c.isProcessing=!1,d.debug&&(console.log("Overall initialization time: "+b.benchmark(a.data(c,"startoveralltimer"))),d.debug&&console.groupEnd&&console.groupEnd()),f.triggerHandler("tablesorter-initialized",c),"function"==typeof d.initialized&&d.initialized(c)},bindMethods:function(c){var d=c.$table,e=c.namespace,f="sortReset update updateRows updateAll updateHeaders addRows updateCell updateComplete sorton appendCache updateCache applyWidgetId applyWidgets refreshWidgets destroy mouseup mouseleave ".split(" ").join(e+" ");d.unbind(f.replace(b.regex.spaces," ")).bind("sortReset"+e,function(a,c){a.stopPropagation(),b.sortReset(this.config,c)}).bind("updateAll"+e,function(a,c,d){a.stopPropagation(),b.updateAll(this.config,c,d)}).bind("update"+e+" updateRows"+e,function(a,c,d){a.stopPropagation(),b.update(this.config,c,d)}).bind("updateHeaders"+e,function(a,c){a.stopPropagation(),b.updateHeaders(this.config,c)}).bind("updateCell"+e,function(a,c,d,e){a.stopPropagation(),b.updateCell(this.config,c,d,e)}).bind("addRows"+e,function(a,c,d,e){a.stopPropagation(),b.addRows(this.config,c,d,e)}).bind("updateComplete"+e,function(){this.isUpdating=!1}).bind("sorton"+e,function(a,c,d,e){a.stopPropagation(),b.sortOn(this.config,c,d,e)}).bind("appendCache"+e,function(c,d,e){c.stopPropagation(),b.appendCache(this.config,e),a.isFunction(d)&&d(this)}).bind("updateCache"+e,function(a,c,d){a.stopPropagation(),b.updateCache(this.config,c,d)}).bind("applyWidgetId"+e,function(a,c){a.stopPropagation(),b.applyWidgetId(this,c)}).bind("applyWidgets"+e,function(a,c){a.stopPropagation(),b.applyWidget(this,c)}).bind("refreshWidgets"+e,function(a,c,d){a.stopPropagation(),b.refreshWidgets(this,c,d)}).bind("removeWidget"+e,function(a,c,d){a.stopPropagation(),b.removeWidget(this,c,d)}).bind("destroy"+e,function(a,c,d){a.stopPropagation(),b.destroy(this,c,d)}).bind("resetToLoadState"+e,function(d){d.stopPropagation(),b.removeWidget(this,!0,!1),c=a.extend(!0,b.defaults,c.originalSettings),this.hasInitialized=!1,b.setup(this,c)})},bindEvents:function(c,d,e){c=a(c)[0];var f,g=c.config,h=g.namespace,i=null;e!==!0&&(d.addClass(h.slice(1)+"_extra_headers"),f=a.fn.closest?d.closest("table")[0]:d.parents("table")[0],f&&"TABLE"===f.nodeName&&f!==c&&a(f).addClass(h.slice(1)+"_extra_table")),f=(g.pointerDown+" "+g.pointerUp+" "+g.pointerClick+" sort keyup ").replace(b.regex.spaces," ").split(" ").join(h+" "),d.find(g.selectorSort).add(d.filter(g.selectorSort)).unbind(f).bind(f,function(c,e){var f,h,j,k=a(c.target),l=" "+c.type+" ";if(!(1!==(c.which||c.button)&&!l.match(" "+g.pointerClick+" | sort | keyup ")||" keyup "===l&&c.which!==b.keyCodes.enter||l.match(" "+g.pointerClick+" ")&&"undefined"!=typeof c.which||l.match(" "+g.pointerUp+" ")&&i!==c.target&&e!==!0)){if(l.match(" "+g.pointerDown+" "))return i=c.target,j=k.jquery.split("."),void("1"===j[0]&&j[1]<4&&c.preventDefault());if(i=null,b.regex.formElements.test(c.target.nodeName)||k.hasClass(g.cssNoSort)||k.parents("."+g.cssNoSort).length>0||k.parents("button").length>0)return!g.cancelSelection;g.delayInit&&b.isEmptyObject(g.cache)&&b.buildCache(g),f=a.fn.closest?a(this).closest("th, td"):/TH|TD/.test(this.nodeName)?a(this):a(this).parents("th, td"),j=d.index(f),g.last.clickedIndex=0>j?f.attr("data-column"):j,h=g.$headers[g.last.clickedIndex],h&&!h.sortDisabled&&b.initSort(g,h,c)}}),g.cancelSelection&&d.attr("unselectable","on").bind("selectstart",!1).css({"user-select":"none",MozUserSelect:"none"})},buildHeaders:function(c){var d,e,f,g;for(c.headerList=[],c.headerContent=[],c.sortVars=[],c.debug&&(f=new Date),c.columns=b.computeColumnIndex(c.$table.children("thead, tfoot").children("tr")),e=c.cssIcon?'<i class="'+(c.cssIcon===b.css.icon?b.css.icon:c.cssIcon+" "+b.css.icon)+'"></i>':"",c.$headers=a(a.map(c.$table.find(c.selectorHeaders),function(d,f){var g,h,i,j,k,l=a(d);if(!l.parent().hasClass(c.cssIgnoreRow))return g=b.getColumnData(c.table,c.headers,f,!0),c.headerContent[f]=l.html(),""===c.headerTemplate||l.find("."+b.css.headerIn).length||(j=c.headerTemplate.replace(b.regex.templateContent,l.html()).replace(b.regex.templateIcon,l.find("."+b.css.icon).length?"":e),c.onRenderTemplate&&(h=c.onRenderTemplate.apply(l,[f,j]),h&&"string"==typeof h&&(j=h)),l.html('<div class="'+b.css.headerIn+'">'+j+"</div>")),c.onRenderHeader&&c.onRenderHeader.apply(l,[f,c,c.$table]),i=parseInt(l.attr("data-column"),10),d.column=i,k=b.getData(l,g,"sortInitialOrder")||c.sortInitialOrder,c.sortVars[i]={count:-1,order:b.getOrder(k)?[1,0,2]:[0,1,2],lockedOrder:!1},k=b.getData(l,g,"lockedOrder")||!1,"undefined"!=typeof k&&k!==!1&&(c.sortVars[i].lockedOrder=!0,c.sortVars[i].order=b.getOrder(k)?[1,1,1]:[0,0,0]),c.headerList[f]=d,l.addClass(b.css.header+" "+c.cssHeader).parent().addClass(b.css.headerRow+" "+c.cssHeaderRow).attr("role","row"),c.tabIndex&&l.attr("tabindex",0),d})),c.$headerIndexed=[],g=0;g<c.columns;g++)b.isEmptyObject(c.sortVars[g])&&(c.sortVars[g]={}),d=c.$headers.filter('[data-column="'+g+'"]'),c.$headerIndexed[g]=d.length?d.not(".sorter-false").length?d.not(".sorter-false").filter(":last"):d.filter(":last"):a();c.$table.find(c.selectorHeaders).attr({scope:"col",role:"columnheader"}),b.updateHeader(c),c.debug&&(console.log("Built headers:"+b.benchmark(f)),console.log(c.$headers))},addInstanceMethods:function(c){a.extend(b.instanceMethods,c)},setupParsers:function(a,c){var d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=a.table,s=0,t={};if(a.$tbodies=a.$table.children("tbody:not(."+a.cssInfoBlock+")"),p="undefined"==typeof c?a.$tbodies:c,q=p.length,0===q)return a.debug?console.warn("Warning: *Empty table!* Not building a parser cache"):"";for(a.debug&&(o=new Date,console[console.group?"group":"log"]("Detecting parsers for each column")),e={extractors:[],parsers:[]};q>s;){if(d=p[s].rows,d.length)for(h=0,g=a.columns,i=0;g>i;i++){if(j=a.$headerIndexed[h],j&&j.length&&(k=b.getColumnData(r,a.headers,h),n=b.getParserById(b.getData(j,k,"extractor")),m=b.getParserById(b.getData(j,k,"sorter")),l="false"===b.getData(j,k,"parser"),a.empties[h]=(b.getData(j,k,"empty")||a.emptyTo||(a.emptyToBottom?"bottom":"top")).toLowerCase(),a.strings[h]=(b.getData(j,k,"string")||a.stringTo||"max").toLowerCase(),l&&(m=b.getParserById("no-parser")),n||(n=!1),m||(m=b.detectParserForColumn(a,d,-1,h)),a.debug&&(t["("+h+") "+j.text()]={parser:m.id,extractor:n?n.id:"none",string:a.strings[h],empty:a.empties[h]}),e.parsers[h]=m,e.extractors[h]=n,f=j[0].colSpan-1,f>0))for(h+=f,g+=f;f+1>0;)e.parsers[h-f]=m,e.extractors[h-f]=n,f--;h++}s+=e.parsers.length?q:1}a.debug&&(b.isEmptyObject(t)?console.warn(" No parsers detected!"):console[console.table?"table":"log"](t),console.log("Completed detecting parsers"+b.benchmark(o)),console.groupEnd&&console.groupEnd()),a.parsers=e.parsers,a.extractors=e.extractors},addParser:function(a){var c,d=b.parsers.length,e=!0;for(c=0;d>c;c++)b.parsers[c].id.toLowerCase()===a.id.toLowerCase()&&(e=!1);e&&(b.parsers[b.parsers.length]=a)},getParserById:function(a){if("false"==a)return!1;var c,d=b.parsers.length;for(c=0;d>c;c++)if(b.parsers[c].id.toLowerCase()===a.toString().toLowerCase())return b.parsers[c];return!1},detectParserForColumn:function(c,d,e,f){for(var g,h,i,j=b.parsers.length,k=!1,l="",m=!0;""===l&&m;)e++,i=d[e],i&&50>e?i.className.indexOf(b.cssIgnoreRow)<0&&(k=d[e].cells[f],l=b.getElementText(c,k,f),h=a(k),c.debug&&console.log("Checking if value was empty on row "+e+", column: "+f+': "'+l+'"')):m=!1;for(;--j>=0;)if(g=b.parsers[j],g&&"text"!==g.id&&g.is&&g.is(l,c.table,k,h))return g;return b.getParserById("text")},getElementText:function(c,d,e){if(!d)return"";var f,g=c.textExtraction||"",h=d.jquery?d:a(d);return"string"==typeof g?"basic"===g&&"undefined"!=typeof(f=h.attr(c.textAttribute))?a.trim(f):a.trim(d.textContent||h.text()):"function"==typeof g?a.trim(g(h[0],c.table,e)):"function"==typeof(f=b.getColumnData(c.table,g,e))?a.trim(f(h[0],c.table,e)):a.trim(h[0].textContent||h.text())},getParsedText:function(a,c,d,e){"undefined"==typeof e&&(e=b.getElementText(a,c,d));var f=""+e,g=a.parsers[d],h=a.extractors[d];return g&&(h&&"function"==typeof h.format&&(e=h.format(e,a.table,c,d)),f="no-parser"===g.id?"":g.format(""+e,a.table,c,d),a.ignoreCase&&"string"==typeof f&&(f=f.toLowerCase())),f},buildCache:function(c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,A,B=c.table,C=c.parsers;if(c.$tbodies=c.$table.children("tbody:not(."+c.cssInfoBlock+")"),l="undefined"==typeof e?c.$tbodies:e,c.cache={},c.totalRows=0,!C)return c.debug?console.warn("Warning: *Empty table!* Not building a cache"):"";for(c.debug&&(q=new Date),c.showProcessing&&b.isProcessing(B,!0),k=0;k<l.length;k++){for(u=[],f=c.cache[k]={normalized:[]},r=l[k]&&l[k].rows.length||0,i=0;r>i;++i)if(s={child:[],raw:[]},m=a(l[k].rows[i]),n=[],m.hasClass(c.cssChildRow)&&0!==i)for(z=f.normalized.length-1,t=f.normalized[z][c.columns],t.$row=t.$row.add(m),m.prev().hasClass(c.cssChildRow)||m.prev().addClass(b.css.cssHasChild),o=m.children("th, td"),z=t.child.length,t.child[z]=[],w=0,y=c.columns,j=0;y>j;j++)p=o[j],p&&(t.child[z][j]=b.getParsedText(c,p,j),v=o[j].colSpan-1,v>0&&(w+=v,y+=v)),w++;else{for(s.$row=m,s.order=i,w=0,y=c.columns,j=0;y>j;++j){if(p=m[0].cells[j],p&&w<c.columns&&(x="undefined"!=typeof C[w],!x&&c.debug&&console.warn("No parser found for row: "+i+", column: "+j+'; cell containing: "'+a(p).text()+'"; does it have a header?'),g=b.getElementText(c,p,w),s.raw[w]=g,h=b.getParsedText(c,p,w,g),n[w]=h,x&&"numeric"===(C[w].type||"").toLowerCase()&&(u[w]=Math.max(Math.abs(h)||0,u[w]||0)),v=p.colSpan-1,v>0)){for(A=0;v>=A;)s.raw[w+A]=c.duplicateSpan||0===A?g:"",n[w+A]=c.duplicateSpan||0===A?g:"",A++;w+=v,y+=v}w++}n[c.columns]=s,f.normalized[f.normalized.length]=n}f.colMax=u,c.totalRows+=f.normalized.length}if(c.showProcessing&&b.isProcessing(B),c.debug){for(z=Math.min(5,c.cache[0].normalized.length),console[console.group?"group":"log"]("Building cache for "+c.totalRows+" rows (showing "+z+" rows in log)"+b.benchmark(q)),g={},j=0;j<c.columns;j++)for(w=0;z>w;w++)g["row: "+w]||(g["row: "+w]={}),g["row: "+w][c.$headerIndexed[j].text()]=c.cache[0].normalized[w][j];console[console.table?"table":"log"](g),console.groupEnd&&console.groupEnd()}a.isFunction(d)&&d(B)},getColumnText:function(c,d,e,f){c=a(c)[0];var g,h,i,j,k,l,m,n,o,p,q="function"==typeof e,r="all"===d,s={raw:[],parsed:[],$cell:[]},t=c.config;if(!b.isEmptyObject(t)){for(k=t.$tbodies.length,g=0;k>g;g++)for(i=t.cache[g].normalized,l=i.length,h=0;l>h;h++)j=i[h],f&&!j[t.columns].$row.is(f)||(p=!0,n=r?j.slice(0,t.columns):j[d],j=j[t.columns],m=r?j.raw:j.raw[d],o=r?j.$row.children():j.$row.children().eq(d),q&&(p=e({tbodyIndex:g,rowIndex:h,parsed:n,raw:m,$row:j.$row,$cell:o})),p!==!1&&(s.parsed[s.parsed.length]=n,s.raw[s.raw.length]=m,s.$cell[s.$cell.length]=o));return s}t.debug&&console.warn("No cache found - aborting getColumnText function!")},setHeadersCss:function(c){var d,e,f,g=c.sortList,h=g.length,i=b.css.sortNone+" "+c.cssNone,j=[b.css.sortAsc+" "+c.cssAsc,b.css.sortDesc+" "+c.cssDesc],k=[c.cssIconAsc,c.cssIconDesc,c.cssIconNone],l=["ascending","descending"],m=c.$table.find("tfoot tr").children("td, th").add(a(c.namespace+"_extra_headers")).removeClass(j.join(" "));for(c.$headers.removeClass(j.join(" ")).addClass(i).attr("aria-sort","none").find("."+b.css.icon).removeClass(k.join(" ")).addClass(k[2]),e=0;h>e;e++)if(2!==g[e][1]&&(d=c.$headers.filter(function(a){for(var d=!0,e=c.$headers.eq(a),f=parseInt(e.attr("data-column"),10),g=f+c.$headers[a].colSpan;g>f;f++)d=d?d||b.isValueInArray(f,c.sortList)>-1:!1;return d}),d=d.not(".sorter-false").filter('[data-column="'+g[e][0]+'"]'+(1===h?":last":"")),d.length)){for(f=0;f<d.length;f++)d[f].sortDisabled||d.eq(f).removeClass(i).addClass(j[g[e][1]]).attr("aria-sort",l[g[e][1]]).find("."+b.css.icon).removeClass(k[2]).addClass(k[g[e][1]]);m.length&&m.filter('[data-column="'+g[e][0]+'"]').removeClass(i).addClass(j[g[e][1]])}for(h=c.$headers.length,e=0;h>e;e++)b.setColumnAriaLabel(c,c.$headers.eq(e))},setColumnAriaLabel:function(c,d,e){if(d.length){var f=parseInt(d.attr("data-column"),10),g=d.hasClass(b.css.sortAsc)?"sortAsc":d.hasClass(b.css.sortDesc)?"sortDesc":"sortNone",h=a.trim(d.text())+": "+b.language[g];d.hasClass("sorter-false")||e===!1?h+=b.language.sortDisabled:(e=c.sortVars[f].order[(c.sortVars[f].count+1)%(c.sortReset?3:2)],h+=b.language[0===e?"nextAsc":1===e?"nextDesc":"nextNone"]),d.attr("aria-label",h)}},updateHeader:function(a){var c,d,e,f,g=a.table,h=a.$headers.length;for(c=0;h>c;c++)e=a.$headers.eq(c),f=b.getColumnData(g,a.headers,c,!0),d="false"===b.getData(e,f,"sorter")||"false"===b.getData(e,f,"parser"),b.setColumnSort(a,e,d)},setColumnSort:function(a,b,c){var d=a.table.id;b[0].sortDisabled=c,b[c?"addClass":"removeClass"]("sorter-false").attr("aria-disabled",""+c),a.tabIndex&&(c?b.removeAttr("tabindex"):b.attr("tabindex","0")),d&&(c?b.removeAttr("aria-controls"):b.attr("aria-controls",d))},updateHeaderSortCount:function(c,d){var e,f,g,h,i,j,k,l,m=d||c.sortList,n=m.length;for(c.sortList=[],h=0;n>h;h++)if(k=m[h],e=parseInt(k[0],10),e<c.columns){switch(c.sortVars[e].order||(l=c.sortVars[e].order=b.getOrder(c.sortInitialOrder)?[1,0,2]:[0,1,2],c.sortVars[e].count=0),l=c.sortVars[e].order,f=(""+k[1]).match(/^(1|d|s|o|n)/),f=f?f[0]:""){case"1":case"d":f=1;break;case"s":f=i||0;break;case"o":j=l[(i||0)%(c.sortReset?3:2)],f=0===j?1:1===j?0:2;break;case"n":f=l[++c.sortVars[e].count%(c.sortReset?3:2)];break;default:f=0}i=0===h?f:i,g=[e,parseInt(f,10)||0],c.sortList[c.sortList.length]=g,f=a.inArray(g[1],l),c.sortVars[e].count=f>=0?f:g[1]%(c.sortReset?3:2)}},updateAll:function(a,c,d){var e=a.table;e.isUpdating=!0,b.refreshWidgets(e,!0,!0),b.buildHeaders(a),b.bindEvents(e,a.$headers,!0),b.bindMethods(a),b.commonUpdate(a,c,d)},update:function(a,c,d){var e=a.table;e.isUpdating=!0,b.updateHeader(a),b.commonUpdate(a,c,d)},updateHeaders:function(a,c){a.table.isUpdating=!0,b.buildHeaders(a),b.bindEvents(a.table,a.$headers,!0),b.resortComplete(a,c)},updateCell:function(c,d,e,f){if(b.isEmptyObject(c.cache))return b.updateHeader(c),void b.commonUpdate(c,e,f);c.table.isUpdating=!0,c.$table.find(c.selectorRemove).remove();var g,h,i,j,k,l,m=c.$tbodies,n=a(d),o=m.index(a.fn.closest?n.closest("tbody"):n.parents("tbody").filter(":first")),p=c.cache[o],q=a.fn.closest?n.closest("tr"):n.parents("tr").filter(":first");if(d=n[0],m.length&&o>=0){if(i=m.eq(o).find("tr").index(q),k=p.normalized[i],l=q[0].cells.length,l!==c.columns)for(j=0,g=!1,h=0;l>h;h++)g||q[0].cells[h]===d?g=!0:j+=q[0].cells[h].colSpan;else j=n.index();g=b.getElementText(c,d,j),k[c.columns].raw[j]=g,g=b.getParsedText(c,d,j,g),k[j]=g,k[c.columns].$row=q,"numeric"===(c.parsers[j].type||"").toLowerCase()&&(p.colMax[j]=Math.max(Math.abs(g)||0,p.colMax[j]||0)),g="undefined"!==e?e:c.resort,g!==!1?b.checkResort(c,g,f):b.resortComplete(c,f)}else c.debug&&console.error("updateCell aborted, tbody missing or not within the indicated table"),c.table.isUpdating=!1},addRows:function(c,d,e,f){var g,h,i,j,k,l,m,n,o,p,q,r,s,t="string"==typeof d&&1===c.$tbodies.length&&/<tr/.test(d||""),u=c.table;if(t)d=a(d),c.$tbodies.append(d);else if(!(d&&d instanceof jQuery&&(a.fn.closest?d.closest("table")[0]:d.parents("table")[0])===c.table))return c.debug&&console.error("addRows method requires (1) a jQuery selector reference to rows that have already been added to the table, or (2) row HTML string to be added to a table with only one tbody"),!1;if(u.isUpdating=!0,b.isEmptyObject(c.cache))b.updateHeader(c),b.commonUpdate(c,e,f);else{for(k=d.filter("tr").attr("role","row").length,i=c.$tbodies.index(d.parents("tbody").filter(":first")),c.parsers&&c.parsers.length||b.setupParsers(c),j=0;k>j;j++){for(o=0,m=d[j].cells.length,n=c.cache[i].normalized.length,q=[],p={child:[],raw:[],$row:d.eq(j),order:n},l=0;m>l;l++)r=d[j].cells[l],g=b.getElementText(c,r,o),p.raw[o]=g,h=b.getParsedText(c,r,o,g),q[o]=h,"numeric"===(c.parsers[o].type||"").toLowerCase()&&(c.cache[i].colMax[o]=Math.max(Math.abs(h)||0,c.cache[i].colMax[o]||0)),s=r.colSpan-1,s>0&&(o+=s),o++;q[c.columns]=p,c.cache[i].normalized[n]=q}b.checkResort(c,e,f)}},updateCache:function(a,c,d){a.parsers&&a.parsers.length||b.setupParsers(a,d),b.buildCache(a,c,d)},appendCache:function(a,c){var d,e,f,g,h,i,j,k=a.table,l=a.widgetOptions,m=a.$tbodies,n=[],o=a.cache;if(b.isEmptyObject(o))return a.appender?a.appender(k,n):k.isUpdating?a.$table.triggerHandler("updateComplete",k):"";for(a.debug&&(j=new Date),i=0;i<m.length;i++)if(f=m.eq(i),f.length){for(g=b.processTbody(k,f,!0),d=o[i].normalized,e=d.length,h=0;e>h;h++)n[n.length]=d[h][a.columns].$row,a.appender&&(!a.pager||a.pager.removeRows&&l.pager_removeRows||a.pager.ajax)||g.append(d[h][a.columns].$row);b.processTbody(k,g,!1)}a.appender&&a.appender(k,n),a.debug&&console.log("Rebuilt table"+b.benchmark(j)),c||a.appender||b.applyWidget(k),k.isUpdating&&a.$table.triggerHandler("updateComplete",k)},commonUpdate:function(a,c,d){a.$table.find(a.selectorRemove).remove(),b.setupParsers(a),b.buildCache(a),b.checkResort(a,c,d)},initSort:function(c,d,e){if(c.table.isUpdating)return setTimeout(function(){b.initSort(c,d,e)},50);var f,g,h,i,j,k,l,m=!e[c.sortMultiSortKey],n=c.table,o=c.$headers.length,p=parseInt(a(d).attr("data-column"),10),q=c.sortVars[p].order;if(c.$table.triggerHandler("sortStart",n),c.sortVars[p].count=e[c.sortResetKey]?2:(c.sortVars[p].count+1)%(c.sortReset?3:2),c.sortRestart)for(h=0;o>h;h++)l=c.$headers.eq(h),k=parseInt(l.attr("data-column"),10),p!==k&&(m||l.hasClass(b.css.sortNone))&&(c.sortVars[k].count=-1);if(m){if(c.sortList=[],c.last.sortList=[],null!==c.sortForce)for(f=c.sortForce,g=0;g<f.length;g++)f[g][0]!==p&&(c.sortList[c.sortList.length]=f[g]);if(i=q[c.sortVars[p].count],2>i&&(c.sortList[c.sortList.length]=[p,i],d.colSpan>1))for(g=1;g<d.colSpan;g++)c.sortList[c.sortList.length]=[p+g,i],c.sortVars[p+g].count=a.inArray(i,q)}else if(c.sortList=a.extend([],c.last.sortList),b.isValueInArray(p,c.sortList)>=0)for(g=0;g<c.sortList.length;g++)k=c.sortList[g],k[0]===p&&(k[1]=q[c.sortVars[p].count],2===k[1]&&(c.sortList.splice(g,1),c.sortVars[p].count=-1));else if(i=q[c.sortVars[p].count],2>i&&(c.sortList[c.sortList.length]=[p,i],d.colSpan>1))for(g=1;g<d.colSpan;g++)c.sortList[c.sortList.length]=[p+g,i],c.sortVars[p+g].count=a.inArray(i,q);if(c.last.sortList=a.extend([],c.sortList),c.sortList.length&&c.sortAppend&&(f=a.isArray(c.sortAppend)?c.sortAppend:c.sortAppend[c.sortList[0][0]],!b.isEmptyObject(f)))for(g=0;g<f.length;g++)if(f[g][0]!==p&&b.isValueInArray(f[g][0],c.sortList)<0){if(i=f[g][1],j=(""+i).match(/^(a|d|s|o|n)/))switch(k=c.sortList[0][1],j[0]){case"d":i=1;break;case"s":i=k;break;case"o":i=0===k?1:0;break;case"n":i=(k+1)%(c.sortReset?3:2);break;default:i=0}c.sortList[c.sortList.length]=[f[g][0],i]}c.$table.triggerHandler("sortBegin",n),setTimeout(function(){b.setHeadersCss(c),b.multisort(c),b.appendCache(c),c.$table.triggerHandler("sortBeforeEnd",n),c.$table.triggerHandler("sortEnd",n)},1)},multisort:function(a){var c,d,e,f,g=a.table,h=0,i=a.textSorter||"",j=a.sortList,k=j.length,l=a.$tbodies.length;if(!a.serverSideSorting&&!b.isEmptyObject(a.cache)){for(a.debug&&(d=new Date),c=0;l>c;c++)e=a.cache[c].colMax,f=a.cache[c].normalized,f.sort(function(c,d){var f,l,m,n,o,p,q;for(f=0;k>f;f++){if(m=j[f][0],n=j[f][1],h=0===n,a.sortStable&&c[m]===d[m]&&1===k)return c[a.columns].order-d[a.columns].order;if(l=/n/i.test(b.getSortType(a.parsers,m)),l&&a.strings[m]?(l="boolean"==typeof b.string[a.strings[m]]?(h?1:-1)*(b.string[a.strings[m]]?-1:1):a.strings[m]?b.string[a.strings[m]]||0:0,o=a.numberSorter?a.numberSorter(c[m],d[m],h,e[m],g):b["sortNumeric"+(h?"Asc":"Desc")](c[m],d[m],l,e[m],m,a)):(p=h?c:d,q=h?d:c,o="function"==typeof i?i(p[m],q[m],h,m,g):"object"==typeof i&&i.hasOwnProperty(m)?i[m](p[m],q[m],h,m,g):b["sortNatural"+(h?"Asc":"Desc")](c[m],d[m],m,a)),o)return o}return c[a.columns].order-d[a.columns].order});a.debug&&console.log("Applying sort "+j.toString()+b.benchmark(d))}},resortComplete:function(b,c){b.table.isUpdating&&b.$table.triggerHandler("updateComplete",b.table),a.isFunction(c)&&c(b.table)},checkResort:function(c,d,e){var f=a.isArray(d)?d:c.sortList,g="undefined"==typeof d?c.resort:d;g===!1||c.serverSideSorting||c.table.isProcessing?(b.resortComplete(c,e),b.applyWidget(c.table,!1)):f.length?b.sortOn(c,f,function(){b.resortComplete(c,e)},!0):b.sortReset(c,function(){b.resortComplete(c,e),b.applyWidget(c.table,!1)})},sortOn:function(c,d,e,f){var g=c.table;c.$table.triggerHandler("sortStart",g),b.updateHeaderSortCount(c,d),b.setHeadersCss(c),c.delayInit&&b.isEmptyObject(c.cache)&&b.buildCache(c),c.$table.triggerHandler("sortBegin",g),b.multisort(c),b.appendCache(c,f),c.$table.triggerHandler("sortBeforeEnd",g),c.$table.triggerHandler("sortEnd",g),b.applyWidget(g),a.isFunction(e)&&e(g)},sortReset:function(c,d){c.sortList=[],b.setHeadersCss(c),b.multisort(c),b.appendCache(c),a.isFunction(d)&&d(c.table)},getSortType:function(a,b){return a&&a[b]?a[b].type||"":""},getOrder:function(a){return/^d/i.test(a)||1===a},sortNatural:function(a,c){if(a===c)return 0;var d,e,f,g,h,i,j=b.regex;if(j.hex.test(c)){if(d=parseInt(a.match(j.hex),16),e=parseInt(c.match(j.hex),16),e>d)return-1;if(d>e)return 1}for(d=a.replace(j.chunk,"\\0$1\\0").replace(j.chunks,"").split("\\0"),e=c.replace(j.chunk,"\\0$1\\0").replace(j.chunks,"").split("\\0"),i=Math.max(d.length,e.length),h=0;i>h;h++){if(f=isNaN(d[h])?d[h]||0:parseFloat(d[h])||0,g=isNaN(e[h])?e[h]||0:parseFloat(e[h])||0,isNaN(f)!==isNaN(g))return isNaN(f)?1:-1;if(typeof f!=typeof g&&(f+="",g+=""),g>f)return-1;if(f>g)return 1}return 0},sortNaturalAsc:function(a,c,d,e){if(a===c)return 0;var f=b.string[e.empties[d]||e.emptyTo];return""===a&&0!==f?"boolean"==typeof f?f?-1:1:-f||-1:""===c&&0!==f?"boolean"==typeof f?f?1:-1:f||1:b.sortNatural(a,c)},sortNaturalDesc:function(a,c,d,e){if(a===c)return 0;var f=b.string[e.empties[d]||e.emptyTo];return""===a&&0!==f?"boolean"==typeof f?f?-1:1:f||1:""===c&&0!==f?"boolean"==typeof f?f?1:-1:-f||-1:b.sortNatural(c,a)},sortText:function(a,b){return a>b?1:b>a?-1:0},getTextValue:function(a,b,c){if(c){var d,e=a?a.length:0,f=c+b;for(d=0;e>d;d++)f+=a.charCodeAt(d);return b*f}return 0},sortNumericAsc:function(a,c,d,e,f,g){if(a===c)return 0;var h=b.string[g.empties[f]||g.emptyTo];return""===a&&0!==h?"boolean"==typeof h?h?-1:1:-h||-1:""===c&&0!==h?"boolean"==typeof h?h?1:-1:h||1:(isNaN(a)&&(a=b.getTextValue(a,d,e)),isNaN(c)&&(c=b.getTextValue(c,d,e)),a-c)},sortNumericDesc:function(a,c,d,e,f,g){if(a===c)return 0;var h=b.string[g.empties[f]||g.emptyTo];return""===a&&0!==h?"boolean"==typeof h?h?-1:1:h||1:""===c&&0!==h?"boolean"==typeof h?h?1:-1:-h||-1:(isNaN(a)&&(a=b.getTextValue(a,d,e)),isNaN(c)&&(c=b.getTextValue(c,d,e)),c-a)},sortNumeric:function(a,b){return a-b},addWidget:function(a){a.id&&!b.isEmptyObject(b.getWidgetById(a.id))&&console.warn('"'+a.id+'" widget was loaded more than once!'),b.widgets[b.widgets.length]=a},hasWidget:function(b,c){return b=a(b),b.length&&b[0].config&&b[0].config.widgetInit[c]||!1},getWidgetById:function(a){var c,d,e=b.widgets.length;for(c=0;e>c;c++)if(d=b.widgets[c],d&&d.id&&d.id.toLowerCase()===a.toLowerCase())return d},applyWidgetOptions:function(c){var d,e,f=c.config,g=f.widgets.length;if(g)for(d=0;g>d;d++)e=b.getWidgetById(f.widgets[d]),e&&e.options&&(f.widgetOptions=a.extend(!0,{},e.options,f.widgetOptions))},addWidgetFromClass:function(a){var c,d,e=a.config,f="^"+e.widgetClass.replace(b.regex.templateName,"(\\S+)+")+"$",g=new RegExp(f,"g"),h=(a.className||"").split(b.regex.spaces);if(h.length)for(c=h.length,d=0;c>d;d++)h[d].match(g)&&(e.widgets[e.widgets.length]=h[d].replace(g,"$1"))},applyWidgetId:function(c,d,e){c=a(c)[0];var f,g,h,i=c.config,j=i.widgetOptions,k=b.getWidgetById(d);k&&(h=k.id,f=!1,a.inArray(h,i.widgets)<0&&(i.widgets[i.widgets.length]=h),i.debug&&(g=new Date),!e&&i.widgetInit[h]||(i.widgetInit[h]=!0,c.hasInitialized&&b.applyWidgetOptions(c),"function"==typeof k.init&&(f=!0,i.debug&&console[console.group?"group":"log"]("Initializing "+h+" widget"),k.init(c,k,i,j))),e||"function"!=typeof k.format||(f=!0,i.debug&&console[console.group?"group":"log"]("Updating "+h+" widget"),k.format(c,i,j,!1)),i.debug&&f&&(console.log("Completed "+(e?"initializing ":"applying ")+h+" widget"+b.benchmark(g)),console.groupEnd&&console.groupEnd()))},applyWidget:function(c,d,e){c=a(c)[0];var f,g,h,i,j,k=c.config,l=[];if(d===!1||!c.hasInitialized||!c.isApplyingWidgets&&!c.isUpdating){if(k.debug&&(j=new Date),b.addWidgetFromClass(c),clearTimeout(k.timerReady),k.widgets.length){for(c.isApplyingWidgets=!0,k.widgets=a.grep(k.widgets,function(b,c){return a.inArray(b,k.widgets)===c}),h=k.widgets||[],g=h.length,f=0;g>f;f++)i=b.getWidgetById(h[f]),i&&i.id?(i.priority||(i.priority=10),l[f]=i):k.debug&&console.warn('"'+h[f]+'" widget code does not exist!');for(l.sort(function(a,b){return a.priority<b.priority?-1:a.priority===b.priority?0:1}),g=l.length,k.debug&&console[console.group?"group":"log"]("Start "+(d?"initializing":"applying")+" widgets"),f=0;g>f;f++)i=l[f],i&&i.id&&b.applyWidgetId(c,i.id,d);k.debug&&console.groupEnd&&console.groupEnd(),d||"function"!=typeof e||e(c)}k.timerReady=setTimeout(function(){c.isApplyingWidgets=!1,a.data(c,"lastWidgetApplication",new Date),k.$table.triggerHandler("tablesorter-ready")},10),k.debug&&(i=k.widgets.length,console.log("Completed "+(d===!0?"initializing ":"applying ")+i+" widget"+(1!==i?"s":"")+b.benchmark(j)))}},removeWidget:function(c,d,e){c=a(c)[0];var f,g,h,i,j=c.config;if(d===!0)for(d=[],i=b.widgets.length,h=0;i>h;h++)g=b.widgets[h],g&&g.id&&(d[d.length]=g.id);else d=(a.isArray(d)?d.join(","):d||"").toLowerCase().split(/[\s,]+/);for(i=d.length,f=0;i>f;f++)g=b.getWidgetById(d[f]),h=a.inArray(d[f],j.widgets),h>=0&&e!==!0&&j.widgets.splice(h,1),g&&g.remove&&(j.debug&&console.log((e?"Refreshing":"Removing")+' "'+d[f]+'" widget'),g.remove(c,j,j.widgetOptions,e),j.widgetInit[d[f]]=!1)},refreshWidgets:function(c,d,e){c=a(c)[0];var f,g,h=c.config,i=h.widgets,j=b.widgets,k=j.length,l=[],m=function(b){a(b).triggerHandler("refreshComplete")};for(f=0;k>f;f++)g=j[f],g&&g.id&&(d||a.inArray(g.id,i)<0)&&(l[l.length]=g.id);b.removeWidget(c,l.join(","),!0),e!==!0?(b.applyWidget(c,d||!1,m),d&&b.applyWidget(c,!1,m)):m(c)},benchmark:function(a){return" ( "+((new Date).getTime()-a.getTime())+"ms )"},log:function(){console.log(arguments)},isEmptyObject:function(a){for(var b in a)return!1;return!0},isValueInArray:function(a,b){var c,d=b&&b.length||0;for(c=0;d>c;c++)if(b[c][0]===a)return c;return-1},formatFloat:function(c,d){if("string"!=typeof c||""===c)return c;var e,f=d&&d.config?d.config.usNumberFormat!==!1:"undefined"!=typeof d?d:!0;return c=f?c.replace(b.regex.comma,""):c.replace(b.regex.digitNonUS,"").replace(b.regex.comma,"."),b.regex.digitNegativeTest.test(c)&&(c=c.replace(b.regex.digitNegativeReplace,"-$1")),e=parseFloat(c),isNaN(e)?a.trim(c):e},isDigit:function(a){return isNaN(a)?b.regex.digitTest.test(a.toString().replace(b.regex.digitReplace,"")):""!==a},computeColumnIndex:function(b,c){var d,e,f,g,h,i,j,k,l,m,n=c&&c.columns||0,o=[],p=new Array(n);
|
2 |
+
for(d=0;d<b.length;d++)for(i=b[d].cells,e=0;e<i.length;e++){for(h=i[e],j=h.parentNode.rowIndex,k=h.rowSpan||1,l=h.colSpan||1,"undefined"==typeof o[j]&&(o[j]=[]),f=0;f<o[j].length+1;f++)if("undefined"==typeof o[j][f]){m=f;break}for(n&&h.cellIndex===m||(h.setAttribute?h.setAttribute("data-column",m):a(h).attr("data-column",m)),f=j;j+k>f;f++)for("undefined"==typeof o[f]&&(o[f]=[]),p=o[f],g=m;m+l>g;g++)p[g]="x"}return p.length},fixColumnWidth:function(c){c=a(c)[0];var d,e,f,g,h,i=c.config,j=i.$table.children("colgroup");if(j.length&&j.hasClass(b.css.colgroup)&&j.remove(),i.widthFixed&&0===i.$table.children("colgroup").length){for(j=a('<colgroup class="'+b.css.colgroup+'">'),d=i.$table.width(),f=i.$tbodies.find("tr:first").children(":visible"),g=f.length,h=0;g>h;h++)e=parseInt(f.eq(h).width()/d*1e3,10)/10+"%",j.append(a("<col>").css("width",e));i.$table.prepend(j)}},getData:function(b,c,d){var e,f,g="",h=a(b);return h.length?(e=a.metadata?h.metadata():!1,f=" "+(h.attr("class")||""),"undefined"!=typeof h.data(d)||"undefined"!=typeof h.data(d.toLowerCase())?g+=h.data(d)||h.data(d.toLowerCase()):e&&"undefined"!=typeof e[d]?g+=e[d]:c&&"undefined"!=typeof c[d]?g+=c[d]:" "!==f&&f.match(" "+d+"-")&&(g=f.match(new RegExp("\\s"+d+"-([\\w-]+)"))[1]||""),a.trim(g)):""},getColumnData:function(b,c,d,e,f){if("undefined"!=typeof c&&null!==c){b=a(b)[0];var g,h,i=b.config,j=f||i.$headers,k=i.$headerIndexed&&i.$headerIndexed[d]||j.filter('[data-column="'+d+'"]:last');if(c[d])return e?c[d]:c[j.index(k)];for(h in c)if("string"==typeof h&&(g=k.filter(h).add(k.find(h)),g.length))return c[h]}},isProcessing:function(c,d,e){c=a(c);var f=c[0].config,g=e||c.find("."+b.css.header);d?("undefined"!=typeof e&&f.sortList.length>0&&(g=g.filter(function(){return this.sortDisabled?!1:b.isValueInArray(parseFloat(a(this).attr("data-column")),f.sortList)>=0})),c.add(g).addClass(b.css.processing+" "+f.cssProcessing)):c.add(g).removeClass(b.css.processing+" "+f.cssProcessing)},processTbody:function(b,c,d){if(b=a(b)[0],d)return b.isProcessing=!0,c.before('<colgroup class="tablesorter-savemyplace"/>'),a.fn.detach?c.detach():c.remove();var e=a(b).find("colgroup.tablesorter-savemyplace");c.insertAfter(e),e.remove(),b.isProcessing=!1},clearTableBody:function(b){a(b)[0].config.$tbodies.children().detach()},characterEquivalents:{a:"áàâãäąå",A:"ÁÀÂÃÄĄÅ",c:"çćč",C:"ÇĆČ",e:"éèêëěę",E:"ÉÈÊËĚĘ",i:"íìİîïı",I:"ÍÌİÎÏ",o:"óòôõöō",O:"ÓÒÔÕÖŌ",ss:"ß",SS:"ẞ",u:"úùûüů",U:"ÚÙÛÜŮ"},replaceAccents:function(a){var c,d="[",e=b.characterEquivalents;if(!b.characterRegex){b.characterRegexArray={};for(c in e)"string"==typeof c&&(d+=e[c],b.characterRegexArray[c]=new RegExp("["+e[c]+"]","g"));b.characterRegex=new RegExp(d+"]")}if(b.characterRegex.test(a))for(c in e)"string"==typeof c&&(a=a.replace(b.characterRegexArray[c],c));return a},restoreHeaders:function(c){var d,e,f=a(c)[0].config,g=f.$table.find(f.selectorHeaders),h=g.length;for(d=0;h>d;d++)e=g.eq(d),e.find("."+b.css.headerIn).length&&e.html(f.headerContent[d])},destroy:function(c,d,e){if(c=a(c)[0],c.hasInitialized){b.removeWidget(c,!0,!1);var f,g=a(c),h=c.config,i=h.debug,j=g.find("thead:first"),k=j.find("tr."+b.css.headerRow).removeClass(b.css.headerRow+" "+h.cssHeaderRow),l=g.find("tfoot:first > tr").children("th, td");d===!1&&a.inArray("uitheme",h.widgets)>=0&&(g.triggerHandler("applyWidgetId",["uitheme"]),g.triggerHandler("applyWidgetId",["zebra"])),j.find("tr").not(k).remove(),f="sortReset update updateRows updateAll updateHeaders updateCell addRows updateComplete sorton appendCache updateCache applyWidgetId applyWidgets refreshWidgets removeWidget destroy mouseup mouseleave "+"keypress sortBegin sortEnd resetToLoadState ".split(" ").join(h.namespace+" "),g.removeData("tablesorter").unbind(f.replace(b.regex.spaces," ")),h.$headers.add(l).removeClass([b.css.header,h.cssHeader,h.cssAsc,h.cssDesc,b.css.sortAsc,b.css.sortDesc,b.css.sortNone].join(" ")).removeAttr("data-column").removeAttr("aria-label").attr("aria-disabled","true"),k.find(h.selectorSort).unbind("mousedown mouseup keypress ".split(" ").join(h.namespace+" ").replace(b.regex.spaces," ")),b.restoreHeaders(c),g.toggleClass(b.css.table+" "+h.tableClass+" tablesorter-"+h.theme,d===!1),c.hasInitialized=!1,delete c.config.cache,"function"==typeof e&&e(c),i&&console.log("tablesorter has been removed")}}};a.fn.tablesorter=function(c){return this.each(function(){var d=this,e=a.extend(!0,{},b.defaults,c,b.instanceMethods);e.originalSettings=c,!d.hasInitialized&&b.buildTable&&"TABLE"!==this.nodeName?b.buildTable(d,e):b.setup(d,e)})},window.console&&window.console.log||(b.logs=[],console={},console.log=console.warn=console.error=console.table=function(){var a=arguments.length>1?arguments:arguments[0];b.logs[b.logs.length]={date:Date.now(),log:a}}),b.addParser({id:"no-parser",is:function(){return!1},format:function(){return""},type:"text"}),b.addParser({id:"text",is:function(){return!0},format:function(c,d){var e=d.config;return c&&(c=a.trim(e.ignoreCase?c.toLocaleLowerCase():c),c=e.sortLocaleCompare?b.replaceAccents(c):c),c},type:"text"}),b.regex.nondigit=/[^\w,. \-()]/g,b.addParser({id:"digit",is:function(a){return b.isDigit(a)},format:function(c,d){var e=b.formatFloat((c||"").replace(b.regex.nondigit,""),d);return c&&"number"==typeof e?e:c?a.trim(c&&d.config.ignoreCase?c.toLocaleLowerCase():c):c},type:"numeric"}),b.regex.currencyReplace=/[+\-,. ]/g,b.regex.currencyTest=/^\(?\d+[\u00a3$\u20ac\u00a4\u00a5\u00a2?.]|[\u00a3$\u20ac\u00a4\u00a5\u00a2?.]\d+\)?$/,b.addParser({id:"currency",is:function(a){return a=(a||"").replace(b.regex.currencyReplace,""),b.regex.currencyTest.test(a)},format:function(c,d){var e=b.formatFloat((c||"").replace(b.regex.nondigit,""),d);return c&&"number"==typeof e?e:c?a.trim(c&&d.config.ignoreCase?c.toLocaleLowerCase():c):c},type:"numeric"}),b.regex.urlProtocolTest=/^(https?|ftp|file):\/\//,b.regex.urlProtocolReplace=/(https?|ftp|file):\/\//,b.addParser({id:"url",is:function(a){return b.regex.urlProtocolTest.test(a)},format:function(c){return c?a.trim(c.replace(b.regex.urlProtocolReplace,"")):c},parsed:!0,type:"text"}),b.regex.dash=/-/g,b.regex.isoDate=/^\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2}/,b.addParser({id:"isoDate",is:function(a){return b.regex.isoDate.test(a)},format:function(a,c){var d=a?new Date(a.replace(b.regex.dash,"/")):a;return d instanceof Date&&isFinite(d)?d.getTime():a},type:"numeric"}),b.regex.percent=/%/g,b.regex.percentTest=/(\d\s*?%|%\s*?\d)/,b.addParser({id:"percent",is:function(a){return b.regex.percentTest.test(a)&&a.length<15},format:function(a,c){return a?b.formatFloat(a.replace(b.regex.percent,""),c):a},type:"numeric"}),b.addParser({id:"image",is:function(a,b,c,d){return d.find("img").length>0},format:function(b,c,d){return a(d).find("img").attr(c.config.imgAttr||"alt")||b},parsed:!0,type:"text"}),b.regex.dateReplace=/(\S)([AP]M)$/i,b.regex.usLongDateTest1=/^[A-Z]{3,10}\.?\s+\d{1,2},?\s+(\d{4})(\s+\d{1,2}:\d{2}(:\d{2})?(\s+[AP]M)?)?$/i,b.regex.usLongDateTest2=/^\d{1,2}\s+[A-Z]{3,10}\s+\d{4}/i,b.addParser({id:"usLongDate",is:function(a){return b.regex.usLongDateTest1.test(a)||b.regex.usLongDateTest2.test(a)},format:function(a,c){var d=a?new Date(a.replace(b.regex.dateReplace,"$1 $2")):a;return d instanceof Date&&isFinite(d)?d.getTime():a},type:"numeric"}),b.regex.shortDateTest=/(^\d{1,2}[\/\s]\d{1,2}[\/\s]\d{4})|(^\d{4}[\/\s]\d{1,2}[\/\s]\d{1,2})/,b.regex.shortDateReplace=/[\-.,]/g,b.regex.shortDateXXY=/(\d{1,2})[\/\s](\d{1,2})[\/\s](\d{4})/,b.regex.shortDateYMD=/(\d{4})[\/\s](\d{1,2})[\/\s](\d{1,2})/,b.convertFormat=function(a,c){a=(a||"").replace(b.regex.spaces," ").replace(b.regex.shortDateReplace,"/"),"mmddyyyy"===c?a=a.replace(b.regex.shortDateXXY,"$3/$1/$2"):"ddmmyyyy"===c?a=a.replace(b.regex.shortDateXXY,"$3/$2/$1"):"yyyymmdd"===c&&(a=a.replace(b.regex.shortDateYMD,"$1/$2/$3"));var d=new Date(a);return d instanceof Date&&isFinite(d)?d.getTime():""},b.addParser({id:"shortDate",is:function(a){return a=(a||"").replace(b.regex.spaces," ").replace(b.regex.shortDateReplace,"/"),b.regex.shortDateTest.test(a)},format:function(a,c,d,e){if(a){var f=c.config,g=f.$headerIndexed[e],h=g.length&&g.data("dateFormat")||b.getData(g,b.getColumnData(c,f.headers,e),"dateFormat")||f.dateFormat;return g.length&&g.data("dateFormat",h),b.convertFormat(a,h)||a}return a},type:"numeric"}),b.regex.timeTest=/^([1-9]|1[0-2]):([0-5]\d)(\s[AP]M)$|^((?:[01]\d|[2][0-4]):[0-5]\d)$/i,b.regex.timeMatch=/([1-9]|1[0-2]):([0-5]\d)(\s[AP]M)|((?:[01]\d|[2][0-4]):[0-5]\d)/i,b.addParser({id:"time",is:function(a){return b.regex.timeTest.test(a)},format:function(a,c){var d,e=(a||"").match(b.regex.timeMatch),f=new Date(a),g=a&&(null!==e?e[0]:"00:00 AM"),h=g?new Date("2000/01/01 "+g.replace(b.regex.dateReplace,"$1 $2")):g;return h instanceof Date&&isFinite(h)?(d=f instanceof Date&&isFinite(f)?f.getTime():0,d?parseFloat(h.getTime()+"."+f.getTime()):h.getTime()):a},type:"numeric"}),b.addParser({id:"metadata",is:function(){return!1},format:function(b,c,d){var e=c.config,f=e.parserMetadataName?e.parserMetadataName:"sortValue";return a(d).metadata()[f]},type:"numeric"}),b.addWidget({id:"zebra",priority:90,format:function(b,c,d){var e,f,g,h,i,j,k,l=new RegExp(c.cssChildRow,"i"),m=c.$tbodies.add(a(c.namespace+"_extra_table").children("tbody:not(."+c.cssInfoBlock+")"));for(i=0;i<m.length;i++)for(g=0,e=m.eq(i).children("tr:visible").not(c.selectorRemove),k=e.length,j=0;k>j;j++)f=e.eq(j),l.test(f[0].className)||g++,h=g%2===0,f.removeClass(d.zebra[h?1:0]).addClass(d.zebra[h?0:1])},remove:function(a,c,d,e){if(!e){var f,g,h=c.$tbodies,i=(d.zebra||["even","odd"]).join(" ");for(f=0;f<h.length;f++)g=b.processTbody(a,h.eq(f),!0),g.children().removeClass(i),b.processTbody(a,g,!1)}}})}(jQuery),a.tablesorter});
|
.local/share/jupyter/nbextensions/varInspector/main.css
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
.varInspector {
|
3 |
+
max-height: 500px;
|
4 |
+
min-height: 100px;
|
5 |
+
font-size: 80%;
|
6 |
+
padding: 0px;
|
7 |
+
overflow-y: auto;
|
8 |
+
font-weight: normal;
|
9 |
+
color: #333333;
|
10 |
+
white-space: nowrap;
|
11 |
+
overflow-x: auto;
|
12 |
+
}
|
13 |
+
|
14 |
+
.varInspector-float-wrapper {
|
15 |
+
position: fixed !important;
|
16 |
+
top: 120px;
|
17 |
+
width:350px;
|
18 |
+
max-width:800px;
|
19 |
+
right: 20px;
|
20 |
+
border: thin solid rgba(0, 0, 0, 0.38);
|
21 |
+
border-radius: 5px;
|
22 |
+
padding:10px;
|
23 |
+
background-color: #fff;
|
24 |
+
opacity: .95;
|
25 |
+
z-index: 100;
|
26 |
+
overflow: hidden;
|
27 |
+
}
|
28 |
+
|
29 |
+
.hide-btn{
|
30 |
+
float: right;
|
31 |
+
}
|
32 |
+
|
33 |
+
.reload-btn{
|
34 |
+
float: right;
|
35 |
+
}
|
36 |
+
|
37 |
+
.kill-btn{
|
38 |
+
float: right;
|
39 |
+
}
|
40 |
+
|
41 |
+
.col-md-9 {
|
42 |
+
overflow:hidden;
|
43 |
+
margin-left: 14%;
|
44 |
+
width: 80%}
|
45 |
+
|
46 |
+
#varInspector-wrapper.closed {
|
47 |
+
min-width: 250px;
|
48 |
+
width: auto;
|
49 |
+
transition: width;
|
50 |
+
}
|
51 |
+
#varInspector-wrapper:hover{
|
52 |
+
opacity: 1;
|
53 |
+
}
|
54 |
+
#varInspector-wrapper .header {
|
55 |
+
font-size: 16px;
|
56 |
+
font-weight: bold;
|
57 |
+
}
|
58 |
+
#varInspector-wrapper .hide-btn {
|
59 |
+
font-size: 14px;
|
60 |
+
font-family: monospace;
|
61 |
+
}
|
62 |
+
|
63 |
+
#varInspector-wrapper .reload-btn {
|
64 |
+
font-size: 14px;
|
65 |
+
font-family: monospace;
|
66 |
+
}
|
67 |
+
|
68 |
+
#varInspector-wrapper .kill-btn {
|
69 |
+
font-size: 14px;
|
70 |
+
font-family: monospace;
|
71 |
+
}
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
/* don't waste so much screen space... */
|
76 |
+
#varInspector-wrapper .toc-item{
|
77 |
+
padding-left: 20px;
|
78 |
+
}
|
79 |
+
|
80 |
+
#varInspector-wrapper .toc-item .toc-item{
|
81 |
+
padding-left: 10px;
|
82 |
+
}
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
table.table, table.table tr, table.table td, table.table th {
|
87 |
+
border: 0;
|
88 |
+
}
|
89 |
+
table.table-nonfluid {
|
90 |
+
width: auto !important;
|
91 |
+
}
|
92 |
+
table.table {
|
93 |
+
margin-left: 0;
|
94 |
+
margin-right: 0;
|
95 |
+
}
|
96 |
+
/* stuff for tablesorter plugin */
|
97 |
+
.tablesorter-default .header,
|
98 |
+
.tablesorter-default .tablesorter-header {
|
99 |
+
background-image: url(data:image/gif;base64,R0lGODlhFQAJAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAkAAAIXjI+AywnaYnhUMoqt3gZXPmVg94yJVQAAOw==);
|
100 |
+
background-position: right center;
|
101 |
+
background-repeat: no-repeat;
|
102 |
+
cursor: pointer;
|
103 |
+
padding-right: 20px;
|
104 |
+
}
|
105 |
+
.tablesorter-default thead .headerSortUp,
|
106 |
+
.tablesorter-default thead .tablesorter-headerSortUp,
|
107 |
+
.tablesorter-default thead .tablesorter-headerAsc {
|
108 |
+
background-image: url(data:image/gif;base64,R0lGODlhFQAEAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAQAAAINjI8Bya2wnINUMopZAQA7);
|
109 |
+
}
|
110 |
+
.tablesorter-default thead .headerSortDown,
|
111 |
+
.tablesorter-default thead .tablesorter-headerSortDown,
|
112 |
+
.tablesorter-default thead .tablesorter-headerDesc {
|
113 |
+
background-image: url(data:image/gif;base64,R0lGODlhFQAEAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAQAAAINjB+gC+jP2ptn0WskLQA7);
|
114 |
+
}
|
115 |
+
.tablesorter-default thead .sorter-false {
|
116 |
+
background-image: none;
|
117 |
+
cursor: default;
|
118 |
+
padding-right: 5px;
|
119 |
+
}
|
.local/share/jupyter/nbextensions/varInspector/main.js
ADDED
@@ -0,0 +1,462 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
define([
|
2 |
+
'require',
|
3 |
+
'jquery',
|
4 |
+
'base/js/namespace',
|
5 |
+
'base/js/events',
|
6 |
+
'notebook/js/codecell'
|
7 |
+
], function(
|
8 |
+
requirejs,
|
9 |
+
$,
|
10 |
+
Jupyter,
|
11 |
+
events,
|
12 |
+
codecell
|
13 |
+
) {
|
14 |
+
"use strict";
|
15 |
+
|
16 |
+
var mod_name = "varInspector";
|
17 |
+
var log_prefix = '[' + mod_name + '] ';
|
18 |
+
|
19 |
+
|
20 |
+
// ...........Parameters configuration......................
|
21 |
+
// define default values for config parameters if they were not present in general settings (notebook.json)
|
22 |
+
var cfg = {
|
23 |
+
'window_display': false,
|
24 |
+
'cols': {
|
25 |
+
'lenName': 16,
|
26 |
+
'lenType': 16,
|
27 |
+
'lenVar': 40
|
28 |
+
},
|
29 |
+
'kernels_config' : {
|
30 |
+
'python': {
|
31 |
+
library: 'var_list.py',
|
32 |
+
delete_cmd_prefix: 'del ',
|
33 |
+
delete_cmd_postfix: '',
|
34 |
+
varRefreshCmd: 'print(var_dic_list())'
|
35 |
+
},
|
36 |
+
'r': {
|
37 |
+
library: 'var_list.r',
|
38 |
+
delete_cmd_prefix: 'rm(',
|
39 |
+
delete_cmd_postfix: ') ',
|
40 |
+
varRefreshCmd: 'cat(var_dic_list()) '
|
41 |
+
}
|
42 |
+
},
|
43 |
+
'types_to_exclude': ['module', 'function', 'builtin_function_or_method', 'instance', '_Feature']
|
44 |
+
}
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
//.....................global variables....
|
49 |
+
|
50 |
+
|
51 |
+
var st = {}
|
52 |
+
st.config_loaded = false;
|
53 |
+
st.extension_initialized = false;
|
54 |
+
st.code_init = "";
|
55 |
+
|
56 |
+
function read_config(cfg, callback) { // read after nb is loaded
|
57 |
+
var config = Jupyter.notebook.config;
|
58 |
+
config.loaded.then(function() {
|
59 |
+
// config may be specified at system level or at document level.
|
60 |
+
// first, update defaults with config loaded from server
|
61 |
+
cfg = $.extend(true, cfg, config.data.varInspector);
|
62 |
+
// then update cfg with some vars found in current notebook metadata
|
63 |
+
// and save in nb metadata (then can be modified per document)
|
64 |
+
|
65 |
+
// window_display is taken from notebook metadata
|
66 |
+
if (Jupyter.notebook.metadata.varInspector) {
|
67 |
+
if (Jupyter.notebook.metadata.varInspector.window_display)
|
68 |
+
cfg.window_display = Jupyter.notebook.metadata.varInspector.window_display;
|
69 |
+
}
|
70 |
+
|
71 |
+
cfg = Jupyter.notebook.metadata.varInspector = $.extend(true,
|
72 |
+
cfg, Jupyter.notebook.metadata.varInspector);
|
73 |
+
|
74 |
+
// but cols and kernels_config are taken from system (if defined)
|
75 |
+
if (config.data.varInspector) {
|
76 |
+
if (config.data.varInspector.cols) {
|
77 |
+
cfg.cols = $.extend(true, cfg.cols, config.data.varInspector.cols);
|
78 |
+
}
|
79 |
+
if (config.data.varInspector.kernels_config) {
|
80 |
+
cfg.kernels_config = $.extend(true, cfg.kernels_config, config.data.varInspector.kernels_config);
|
81 |
+
}
|
82 |
+
}
|
83 |
+
|
84 |
+
// call callbacks
|
85 |
+
callback && callback();
|
86 |
+
st.config_loaded = true;
|
87 |
+
})
|
88 |
+
return cfg;
|
89 |
+
}
|
90 |
+
|
91 |
+
var sortable;
|
92 |
+
|
93 |
+
function toggleVarInspector() {
|
94 |
+
toggle_varInspector(cfg, st)
|
95 |
+
}
|
96 |
+
|
97 |
+
var varInspector_button = function() {
|
98 |
+
if (!Jupyter.toolbar) {
|
99 |
+
events.on("app_initialized.NotebookApp", varInspector_button);
|
100 |
+
return;
|
101 |
+
}
|
102 |
+
if ($("#varInspector_button").length === 0) {
|
103 |
+
$(Jupyter.toolbar.add_buttons_group([
|
104 |
+
Jupyter.keyboard_manager.actions.register ({
|
105 |
+
'help' : 'Variable Inspector',
|
106 |
+
'icon' : 'fa-crosshairs',
|
107 |
+
'handler': toggleVarInspector,
|
108 |
+
}, 'toggle-variable-inspector', 'varInspector')
|
109 |
+
])).find('.btn').attr('id', 'varInspector_button');
|
110 |
+
}
|
111 |
+
};
|
112 |
+
|
113 |
+
var load_css = function() {
|
114 |
+
var link = document.createElement("link");
|
115 |
+
link.type = "text/css";
|
116 |
+
link.rel = "stylesheet";
|
117 |
+
link.href = requirejs.toUrl("./main.css");
|
118 |
+
document.getElementsByTagName("head")[0].appendChild(link);
|
119 |
+
};
|
120 |
+
|
121 |
+
|
122 |
+
function html_table(jsonVars) {
|
123 |
+
function _trunc(x, L) {
|
124 |
+
x = String(x)
|
125 |
+
if (x.length < L) return x
|
126 |
+
else return x.substring(0, L - 3) + '...'
|
127 |
+
}
|
128 |
+
var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
|
129 |
+
var kernel_config = cfg.kernels_config[kernelLanguage];
|
130 |
+
var varList = JSON.parse(String(jsonVars))
|
131 |
+
|
132 |
+
var shape_str = '';
|
133 |
+
var has_shape = false;
|
134 |
+
if (varList.some(listVar => "varShape" in listVar && listVar.varShape !== '')) { //if any of them have a shape
|
135 |
+
shape_str = '<th >Shape</th>';
|
136 |
+
has_shape = true;
|
137 |
+
}
|
138 |
+
var beg_table = '<div class=\"inspector\"><table class=\"table fixed table-condensed table-nonfluid \"><col /> \
|
139 |
+
<col /><col /><thead><tr><th >X</th><th >Name</th><th >Type</th><th >Size</th>' + shape_str + '<th >Value</th></tr></thead><tr><td> \
|
140 |
+
</td></tr>';
|
141 |
+
varList.forEach(listVar => {
|
142 |
+
var shape_col_str = '</td><td>';
|
143 |
+
if (has_shape) {
|
144 |
+
shape_col_str = '</td><td>' + listVar.varShape + '</td><td>';
|
145 |
+
}
|
146 |
+
beg_table +=
|
147 |
+
'<tr><td><a href=\"#\" onClick=\"Jupyter.notebook.kernel.execute(\'' +
|
148 |
+
kernel_config.delete_cmd_prefix + listVar.varName + kernel_config.delete_cmd_postfix + '\'' + '); ' +
|
149 |
+
'Jupyter.notebook.events.trigger(\'varRefresh\'); \">x</a></td>' +
|
150 |
+
'<td>' + _trunc(listVar.varName, cfg.cols.lenName) + '</td><td>' + _trunc(listVar.varType, cfg.cols.lenType) +
|
151 |
+
'</td><td>' + listVar.varSize + shape_col_str + _trunc(listVar.varContent, cfg.cols.lenVar) +
|
152 |
+
'</td></tr>';
|
153 |
+
});
|
154 |
+
var full_table = beg_table + '</table></div>';
|
155 |
+
return full_table;
|
156 |
+
}
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
function code_exec_callback(msg) {
|
161 |
+
var jsonVars = msg.content['text'];
|
162 |
+
var notWellDefined = false;
|
163 |
+
if (msg.content.evalue)
|
164 |
+
notWellDefined = msg.content.evalue == "name 'var_dic_list' is not defined" ||
|
165 |
+
msg.content.evalue.substr(0,28) == "Error in cat(var_dic_list())"
|
166 |
+
//means that var_dic_list was cleared ==> need to retart the extension
|
167 |
+
if (notWellDefined) varInspector_init()
|
168 |
+
else $('#varInspector').html(html_table(jsonVars))
|
169 |
+
|
170 |
+
requirejs(['nbextensions/varInspector/jquery.tablesorter.min'],
|
171 |
+
function() {
|
172 |
+
setTimeout(function() { if ($('#varInspector').length>0)
|
173 |
+
$('#varInspector table').tablesorter()}, 50)
|
174 |
+
});
|
175 |
+
}
|
176 |
+
|
177 |
+
function tableSort() {
|
178 |
+
requirejs(['nbextensions/varInspector/jquery.tablesorter.min'])
|
179 |
+
$('#varInspector table').tablesorter()
|
180 |
+
}
|
181 |
+
|
182 |
+
var varRefresh = function() {
|
183 |
+
var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
|
184 |
+
var kernel_config = cfg.kernels_config[kernelLanguage];
|
185 |
+
requirejs(['nbextensions/varInspector/jquery.tablesorter.min'],
|
186 |
+
function() {
|
187 |
+
Jupyter.notebook.kernel.execute(
|
188 |
+
kernel_config.varRefreshCmd, { iopub: { output: code_exec_callback } }, { silent: false }
|
189 |
+
);
|
190 |
+
});
|
191 |
+
}
|
192 |
+
|
193 |
+
|
194 |
+
var varInspector_init = function() {
|
195 |
+
// Define code_init
|
196 |
+
// read and execute code_init
|
197 |
+
function read_code_init(lib) {
|
198 |
+
var libName = Jupyter.notebook.base_url + "nbextensions/varInspector/" + lib;
|
199 |
+
$.get(libName).done(function(data) {
|
200 |
+
st.code_init = data;
|
201 |
+
st.code_init = st.code_init.replace('lenName', cfg.cols.lenName).replace('lenType', cfg.cols.lenType)
|
202 |
+
.replace('lenVar', cfg.cols.lenVar)
|
203 |
+
//.replace('types_to_exclude', JSON.stringify(cfg.types_to_exclude).replace(/\"/g, "'"))
|
204 |
+
requirejs(
|
205 |
+
[
|
206 |
+
'nbextensions/varInspector/jquery.tablesorter.min'
|
207 |
+
//'nbextensions/varInspector/colResizable-1.6.min'
|
208 |
+
],
|
209 |
+
function() {
|
210 |
+
Jupyter.notebook.kernel.execute(st.code_init, { iopub: { output: code_exec_callback } }, { silent: false });
|
211 |
+
})
|
212 |
+
variable_inspector(cfg, st); // create window if not already present
|
213 |
+
console.log(log_prefix + 'loaded library');
|
214 |
+
}).fail(function() {
|
215 |
+
console.log(log_prefix + 'failed to load ' + lib + ' library')
|
216 |
+
});
|
217 |
+
}
|
218 |
+
|
219 |
+
// read configuration
|
220 |
+
|
221 |
+
cfg = read_config(cfg, function() {
|
222 |
+
// Called when config is available
|
223 |
+
if (typeof Jupyter.notebook.kernel !== "undefined" && Jupyter.notebook.kernel !== null) {
|
224 |
+
var kernelLanguage = Jupyter.notebook.metadata.kernelspec.language.toLowerCase()
|
225 |
+
var kernel_config = cfg.kernels_config[kernelLanguage];
|
226 |
+
if (kernel_config === undefined) { // Kernel is not supported
|
227 |
+
console.warn(log_prefix + " Sorry, can't use kernel language " + kernelLanguage + ".\n" +
|
228 |
+
"Configurations are currently only defined for the following languages:\n" +
|
229 |
+
Object.keys(cfg.kernels_config).join(', ') + "\n" +
|
230 |
+
"See readme for more details.");
|
231 |
+
if ($("#varInspector_button").length > 0) { // extension was present
|
232 |
+
$("#varInspector_button").remove();
|
233 |
+
$('#varInspector-wrapper').remove();
|
234 |
+
// turn off events
|
235 |
+
events.off('execute.CodeCell', varRefresh);
|
236 |
+
events.off('varRefresh', varRefresh);
|
237 |
+
}
|
238 |
+
return
|
239 |
+
}
|
240 |
+
varInspector_button(); // In case button was removed
|
241 |
+
// read and execute code_init (if kernel is supported)
|
242 |
+
read_code_init(kernel_config.library);
|
243 |
+
// console.log("code_init-->", st.code_init)
|
244 |
+
} else {
|
245 |
+
console.warn(log_prefix + "Kernel not available?");
|
246 |
+
}
|
247 |
+
}); // called after config is stable
|
248 |
+
|
249 |
+
// event: on cell execution, update the list of variables
|
250 |
+
events.on('execute.CodeCell', varRefresh);
|
251 |
+
events.on('varRefresh', varRefresh);
|
252 |
+
}
|
253 |
+
|
254 |
+
|
255 |
+
var create_varInspector_div = function(cfg, st) {
|
256 |
+
function save_position(){
|
257 |
+
Jupyter.notebook.metadata.varInspector.position = {
|
258 |
+
'left': $('#varInspector-wrapper').css('left'),
|
259 |
+
'top': $('#varInspector-wrapper').css('top'),
|
260 |
+
'width': $('#varInspector-wrapper').css('width'),
|
261 |
+
'height': $('#varInspector-wrapper').css('height'),
|
262 |
+
'right': $('#varInspector-wrapper').css('right')
|
263 |
+
};
|
264 |
+
}
|
265 |
+
var varInspector_wrapper = $('<div id="varInspector-wrapper"/>')
|
266 |
+
.append(
|
267 |
+
$('<div id="varInspector-header"/>')
|
268 |
+
.addClass("header")
|
269 |
+
.text("Variable Inspector ")
|
270 |
+
.append(
|
271 |
+
$("<a/>")
|
272 |
+
.attr("href", "#")
|
273 |
+
.text("[x]")
|
274 |
+
.addClass("kill-btn")
|
275 |
+
.attr('title', 'Close window')
|
276 |
+
.click(function() {
|
277 |
+
toggleVarInspector();
|
278 |
+
return false;
|
279 |
+
})
|
280 |
+
)
|
281 |
+
.append(
|
282 |
+
$("<a/>")
|
283 |
+
.attr("href", "#")
|
284 |
+
.addClass("hide-btn")
|
285 |
+
.attr('title', 'Hide Variable Inspector')
|
286 |
+
.text("[-]")
|
287 |
+
.click(function() {
|
288 |
+
$('#varInspector-wrapper').css('position', 'fixed');
|
289 |
+
$('#varInspector').slideToggle({
|
290 |
+
start: function(event, ui) {
|
291 |
+
// $(this).width($(this).width());
|
292 |
+
},
|
293 |
+
'complete': function() {
|
294 |
+
Jupyter.notebook.metadata.varInspector['varInspector_section_display'] = $('#varInspector').css('display');
|
295 |
+
save_position();
|
296 |
+
Jupyter.notebook.set_dirty();
|
297 |
+
}
|
298 |
+
});
|
299 |
+
$('#varInspector-wrapper').toggleClass('closed');
|
300 |
+
if ($('#varInspector-wrapper').hasClass('closed')) {
|
301 |
+
cfg.oldHeight = $('#varInspector-wrapper').height(); //.css('height');
|
302 |
+
$('#varInspector-wrapper').css({ height: 40 });
|
303 |
+
$('#varInspector-wrapper .hide-btn')
|
304 |
+
.text('[+]')
|
305 |
+
.attr('title', 'Show Variable Inspector');
|
306 |
+
} else {
|
307 |
+
$('#varInspector-wrapper').height(cfg.oldHeight); //css({ height: cfg.oldHeight });
|
308 |
+
$('#varInspector').height(cfg.oldHeight - $('#varInspector-header').height() - 30 )
|
309 |
+
$('#varInspector-wrapper .hide-btn')
|
310 |
+
.text('[-]')
|
311 |
+
.attr('title', 'Hide Variable Inspector');
|
312 |
+
}
|
313 |
+
return false;
|
314 |
+
})
|
315 |
+
).append(
|
316 |
+
$("<a/>")
|
317 |
+
.attr("href", "#")
|
318 |
+
.text(" \u21BB")
|
319 |
+
.addClass("reload-btn")
|
320 |
+
.attr('title', 'Reload Variable Inspector')
|
321 |
+
.click(function() {
|
322 |
+
//variable_inspector(cfg,st);
|
323 |
+
varRefresh();
|
324 |
+
return false;
|
325 |
+
})
|
326 |
+
).append(
|
327 |
+
$("<span/>")
|
328 |
+
.html("  ")
|
329 |
+
).append(
|
330 |
+
$("<span/>")
|
331 |
+
.html(" ")
|
332 |
+
)
|
333 |
+
).append(
|
334 |
+
$("<div/>").attr("id", "varInspector").addClass('varInspector')
|
335 |
+
)
|
336 |
+
|
337 |
+
$("body").append(varInspector_wrapper);
|
338 |
+
// Ensure position is fixed
|
339 |
+
$('#varInspector-wrapper').css('position', 'fixed');
|
340 |
+
|
341 |
+
// enable dragging and save position on stop moving
|
342 |
+
$('#varInspector-wrapper').draggable({
|
343 |
+
drag: function(event, ui) {}, //end of drag function
|
344 |
+
start: function(event, ui) {
|
345 |
+
$(this).width($(this).width());
|
346 |
+
},
|
347 |
+
stop: function(event, ui) { // on save, store window position
|
348 |
+
save_position();
|
349 |
+
Jupyter.notebook.set_dirty();
|
350 |
+
// Ensure position is fixed (again)
|
351 |
+
$('#varInspector-wrapper').css('position', 'fixed');
|
352 |
+
},
|
353 |
+
});
|
354 |
+
|
355 |
+
$('#varInspector-wrapper').resizable({
|
356 |
+
resize: function(event, ui) {
|
357 |
+
$('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height());
|
358 |
+
},
|
359 |
+
start: function(event, ui) {
|
360 |
+
//$(this).width($(this).width());
|
361 |
+
$(this).css('position', 'fixed');
|
362 |
+
},
|
363 |
+
stop: function(event, ui) { // on save, store window position
|
364 |
+
save_position();
|
365 |
+
$('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height())
|
366 |
+
Jupyter.notebook.set_dirty();
|
367 |
+
// Ensure position is fixed (again)
|
368 |
+
//$(this).css('position', 'fixed');
|
369 |
+
}
|
370 |
+
})
|
371 |
+
|
372 |
+
// restore window position at startup
|
373 |
+
if (Jupyter.notebook.metadata.varInspector.position !== undefined) {
|
374 |
+
$('#varInspector-wrapper').css(Jupyter.notebook.metadata.varInspector.position);
|
375 |
+
}
|
376 |
+
// Ensure position is fixed
|
377 |
+
$('#varInspector-wrapper').css('position', 'fixed');
|
378 |
+
|
379 |
+
// Restore window display
|
380 |
+
if (Jupyter.notebook.metadata.varInspector !== undefined) {
|
381 |
+
if (Jupyter.notebook.metadata.varInspector['varInspector_section_display'] !== undefined) {
|
382 |
+
$('#varInspector').css('display', Jupyter.notebook.metadata.varInspector['varInspector_section_display'])
|
383 |
+
//$('#varInspector').css('height', $('#varInspector-wrapper').height() - $('#varInspector-header').height())
|
384 |
+
if (Jupyter.notebook.metadata.varInspector['varInspector_section_display'] == 'none') {
|
385 |
+
$('#varInspector-wrapper').addClass('closed');
|
386 |
+
$('#varInspector-wrapper').css({ height: 40 });
|
387 |
+
$('#varInspector-wrapper .hide-btn')
|
388 |
+
.text('[+]')
|
389 |
+
.attr('title', 'Show Variable Inspector');
|
390 |
+
}
|
391 |
+
}
|
392 |
+
if (Jupyter.notebook.metadata.varInspector['window_display'] !== undefined) {
|
393 |
+
console.log(log_prefix + "Restoring Variable Inspector window");
|
394 |
+
$('#varInspector-wrapper').css('display', Jupyter.notebook.metadata.varInspector['window_display'] ? 'block' : 'none');
|
395 |
+
if ($('#varInspector-wrapper').hasClass('closed')){
|
396 |
+
$('#varInspector').height(cfg.oldHeight - $('#varInspector-header').height())
|
397 |
+
}else{
|
398 |
+
$('#varInspector').height($('#varInspector-wrapper').height() - $('#varInspector-header').height()-30)
|
399 |
+
}
|
400 |
+
|
401 |
+
}
|
402 |
+
}
|
403 |
+
// if varInspector-wrapper is undefined (first run(?), then hide it)
|
404 |
+
if ($('#varInspector-wrapper').css('display') == undefined) $('#varInspector-wrapper').css('display', "none") //block
|
405 |
+
|
406 |
+
varInspector_wrapper.addClass('varInspector-float-wrapper');
|
407 |
+
}
|
408 |
+
|
409 |
+
var variable_inspector = function(cfg, st) {
|
410 |
+
|
411 |
+
var varInspector_wrapper = $("#varInspector-wrapper");
|
412 |
+
if (varInspector_wrapper.length === 0) {
|
413 |
+
create_varInspector_div(cfg, st);
|
414 |
+
}
|
415 |
+
|
416 |
+
$(window).resize(function() {
|
417 |
+
$('#varInspector').css({ maxHeight: $(window).height() - 30 });
|
418 |
+
$('#varInspector-wrapper').css({ maxHeight: $(window).height() - 10 });
|
419 |
+
});
|
420 |
+
|
421 |
+
$(window).trigger('resize');
|
422 |
+
varRefresh();
|
423 |
+
};
|
424 |
+
|
425 |
+
var toggle_varInspector = function(cfg, st) {
|
426 |
+
// toggle draw (first because of first-click behavior)
|
427 |
+
$("#varInspector-wrapper").toggle({
|
428 |
+
'progress': function() {},
|
429 |
+
'complete': function() {
|
430 |
+
Jupyter.notebook.metadata.varInspector['window_display'] = $('#varInspector-wrapper').css('display') == 'block';
|
431 |
+
Jupyter.notebook.set_dirty();
|
432 |
+
// recompute:
|
433 |
+
variable_inspector(cfg, st);
|
434 |
+
}
|
435 |
+
});
|
436 |
+
};
|
437 |
+
|
438 |
+
|
439 |
+
var load_jupyter_extension = function() {
|
440 |
+
load_css(); //console.log("Loading css")
|
441 |
+
varInspector_button(); //console.log("Adding varInspector_button")
|
442 |
+
|
443 |
+
// If a kernel is available,
|
444 |
+
if (typeof Jupyter.notebook.kernel !== "undefined" && Jupyter.notebook.kernel !== null) {
|
445 |
+
console.log(log_prefix + "Kernel is available -- varInspector initializing ")
|
446 |
+
varInspector_init();
|
447 |
+
}
|
448 |
+
// if a kernel wasn't available, we still wait for one. Anyway, we will run this for new kernel
|
449 |
+
// (test if is is a Python kernel and initialize)
|
450 |
+
// on kernel_ready.Kernel, a new kernel has been started and we shall initialize the extension
|
451 |
+
events.on("kernel_ready.Kernel", function(evt, data) {
|
452 |
+
console.log(log_prefix + "Kernel is available -- reading configuration");
|
453 |
+
varInspector_init();
|
454 |
+
});
|
455 |
+
};
|
456 |
+
|
457 |
+
return {
|
458 |
+
load_ipython_extension: load_jupyter_extension,
|
459 |
+
varRefresh: varRefresh
|
460 |
+
};
|
461 |
+
|
462 |
+
});
|
.local/share/jupyter/nbextensions/varInspector/varInspector.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Type: Jupyter Notebook Extension
|
2 |
+
Name: Variable Inspector
|
3 |
+
Description: The Variable Inspector extension collects all defined variables and display them in a floating window. The extension is also draggable, resizable, collapsable.
|
4 |
+
Link: README.md
|
5 |
+
Icon: icon.png
|
6 |
+
Main: main.js
|
7 |
+
Compatibility: 4.x, 5.x
|
8 |
+
Parameters:
|
9 |
+
- name: varInspector.window_display
|
10 |
+
description: Display window at startup
|
11 |
+
input_type: checkbox
|
12 |
+
default: false
|
13 |
+
- name: varInspector.cols.lenName
|
14 |
+
description: Variable name - Maximum number of characters to display
|
15 |
+
input_type: number
|
16 |
+
default: 16
|
17 |
+
- name: varInspector.cols.lenType
|
18 |
+
description: Variable type - Maximum number of characters to display
|
19 |
+
input_type: number
|
20 |
+
default: 16
|
21 |
+
- name: varInspector.cols.lenVar
|
22 |
+
description: Variable value/content - Maximum number of characters to display
|
23 |
+
input_type: number
|
24 |
+
default: 40
|
25 |
+
- name: varInspector.kernels_config
|
26 |
+
description: |
|
27 |
+
json object defining the libraries to load, the delete/remove
|
28 |
+
kernel commands to delete a variable, and
|
29 |
+
finally the commands to refresh the list of variables.
|
30 |
+
input_type: json_object
|
31 |
+
default: |
|
32 |
+
{
|
33 |
+
'python': {
|
34 |
+
library: 'var_list.py',
|
35 |
+
delete_cmd_prefix: 'del ',
|
36 |
+
delete_cmd_postfix: '',
|
37 |
+
varRefreshCmd: 'print(var_dic_list())'
|
38 |
+
},
|
39 |
+
'r': {
|
40 |
+
library: 'var_list.r',
|
41 |
+
delete_cmd_prefix: 'rm(',
|
42 |
+
delete_cmd_postfix: ') ',
|
43 |
+
varRefreshCmd: 'cat(var_dic_list()) '
|
44 |
+
}
|
45 |
+
}
|
.local/share/jupyter/nbextensions/varInspector/var_list.r
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
library(jsonlite)
|
2 |
+
var_dic_list = function(){
|
3 |
+
ll = ls(.GlobalEnv, all.names = FALSE)
|
4 |
+
varList=list()
|
5 |
+
iter = 1
|
6 |
+
for (k in ll){
|
7 |
+
if (class(get(k))!='function'){
|
8 |
+
class = class(get(k)); rk = capture.output(str(get(k))); size = object.size(get(k)); sk = substr(get(k),0, 200);
|
9 |
+
# [{'varName':v, 'varType': type(eval(v)).__name__, 'varSize': _getsizeof(eval(v)), 'varContent': str(eval(v))[:200]}
|
10 |
+
l = list(varName = k, varType = class, varSize = size, varContent = sk)
|
11 |
+
varList[[iter]] = l
|
12 |
+
# print(l)
|
13 |
+
iter = iter + 1}
|
14 |
+
}
|
15 |
+
return(toJSON(varList, simplifyVector = FALSE, force=TRUE))
|
16 |
+
}
|
17 |
+
cat(var_dic_list())
|
.local/share/jupyter/nbextensions/zenmode/README.md
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Zenmode
|
2 |
+
=======
|
3 |
+
|
4 |
+
A little extension to give Zenmode functionality to the IPython notebook
|
.local/share/jupyter/nbextensions/zenmode/images/back3.jpg
ADDED
![]() |
.local/share/jupyter/nbextensions/zenmode/main.css
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.navbar-inner {
|
2 |
+
opacity: 0.5;
|
3 |
+
-webkit-transition: opacity 0.3s ease-in-out;
|
4 |
+
-moz-transition: opacity 0.3s ease-in-out;
|
5 |
+
-o-transition: opacity 0.3s ease-in-out;
|
6 |
+
transition: opacity 0.3s ease-in-out;
|
7 |
+
}
|
8 |
+
|
9 |
+
.navbar-inner:hover {
|
10 |
+
opacity: 1.0;
|
11 |
+
}
|
12 |
+
|
13 |
+
#maintoolbar .navbar-text {
|
14 |
+
display: none !important;
|
15 |
+
}
|
16 |
+
|
17 |
+
#notebook-container {
|
18 |
+
background-color: rgba(255, 255, 255, 0);
|
19 |
+
}
|
20 |
+
|
21 |
+
/*
|
22 |
+
.cell {
|
23 |
+
background-color: rgb(255, 255, 255);
|
24 |
+
}
|
25 |
+
|
26 |
+
.CodeMirror {
|
27 |
+
background: #F8FCCF;
|
28 |
+
}
|
29 |
+
|
30 |
+
div.input_area {
|
31 |
+
margin: 2px;
|
32 |
+
border: none;
|
33 |
+
}
|
34 |
+
*/
|
.local/share/jupyter/nbextensions/zenmode/main.js
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/**
|
2 |
+
* ----------------------------------------------------------------------------
|
3 |
+
* Copyright (c) 2013 - Damián Avila
|
4 |
+
* Copyright (c) 2015 - Joshua Cooke Barnes (jcb91)
|
5 |
+
*
|
6 |
+
* Distributed under the terms of the Modified BSD License.
|
7 |
+
*
|
8 |
+
* A little extension to give Zenmode functionality to the IPython notebook.
|
9 |
+
* ----------------------------------------------------------------------------
|
10 |
+
*/
|
11 |
+
|
12 |
+
define([
|
13 |
+
"require",
|
14 |
+
"jquery",
|
15 |
+
"base/js/namespace",
|
16 |
+
"base/js/events"
|
17 |
+
], function(
|
18 |
+
requirejs,
|
19 |
+
$,
|
20 |
+
IPython,
|
21 |
+
events
|
22 |
+
) {
|
23 |
+
"use_strict";
|
24 |
+
|
25 |
+
var backgrounds = [
|
26 |
+
'back11.jpg', 'back12.jpg', 'back2.jpg', 'back21.jpg', 'back22.jpg',
|
27 |
+
'back3.jpg', 'ipynblogo0.png', 'ipynblogo1.png'
|
28 |
+
];
|
29 |
+
|
30 |
+
var hide_header = true;
|
31 |
+
var hide_menubar = true
|
32 |
+
|
33 |
+
var getZenModeActive = function() {
|
34 |
+
return ($('link#zenmodecss')[0] !== undefined);
|
35 |
+
};
|
36 |
+
|
37 |
+
// not sure when this changed, so maybe this is the wrong comparison to make
|
38 |
+
var use_layout_manager = (Number(IPython.version.split(".")[0]) < 3);
|
39 |
+
var header_pattern = use_layout_manager ? '#header' : '#header-container';
|
40 |
+
if (use_layout_manager) {
|
41 |
+
// We need to redefine this function because in the IPython codebase
|
42 |
+
// the app_height function does not take into account the possibility
|
43 |
+
// to hide the header and 'menubar' bar.
|
44 |
+
IPython.layout_manager.app_height = function() {
|
45 |
+
var get_height = function(pattern) {
|
46 |
+
var el = $(pattern);
|
47 |
+
return getZenModeActive() ? 0 : el.outerHeight(true);
|
48 |
+
};
|
49 |
+
var h = $(window).height();
|
50 |
+
// content height
|
51 |
+
return h - get_height(header_pattern) - get_height('#menubar') - get_height('#maintoolbar');
|
52 |
+
};
|
53 |
+
}
|
54 |
+
|
55 |
+
var menu_pattern = '#menubar';
|
56 |
+
var oldBgAttrName = "zenmode-old-bg";
|
57 |
+
var toggleZenMode = function (background) {
|
58 |
+
if (getZenModeActive()) {
|
59 |
+
console.log('toggling zenmode off');
|
60 |
+
$('#zenmode-toggle-btn .fa').removeClass("fa-rebel").addClass("fa-empire");
|
61 |
+
$('#zenmodecss').remove();
|
62 |
+
|
63 |
+
|
64 |
+
// Remove zenmode css settings only when changes were made.
|
65 |
+
if (backgrounds.length != 0) {
|
66 |
+
$('body').css({
|
67 |
+
'background-image': 'none'
|
68 |
+
})
|
69 |
+
}
|
70 |
+
|
71 |
+
// This should be changed at some point in the future to preserve non-zenmode visibility settings
|
72 |
+
$(menu_pattern).toggle(true);
|
73 |
+
$(header_pattern).toggle(true);
|
74 |
+
}
|
75 |
+
else {
|
76 |
+
console.log('toggling zenmode on');
|
77 |
+
$('#zenmode-toggle-btn .fa').removeClass("fa-empire").addClass("fa-rebel");
|
78 |
+
$('head').append(
|
79 |
+
$('<link id="zenmodecss" rel="stylesheet" type="text/css"/>').attr(
|
80 |
+
'href', requirejs.toUrl("./main.css"))
|
81 |
+
);
|
82 |
+
|
83 |
+
if (background === undefined){
|
84 |
+
background = backgrounds[Math.floor(Math.random() * backgrounds.length)];
|
85 |
+
}
|
86 |
+
var absolute_url_pat = /^https?:\/\/|^\/\//i;
|
87 |
+
if (!absolute_url_pat.test(background)) {
|
88 |
+
background = requirejs.toUrl("./images/" + background);
|
89 |
+
}
|
90 |
+
|
91 |
+
// Apply zenmode css when there are images to be used.
|
92 |
+
if (backgrounds.length != 0) {
|
93 |
+
$('body').css({
|
94 |
+
'background-image': 'url(' + background + ')',
|
95 |
+
'background-repeat': 'no-repeat',
|
96 |
+
'background-position': 'center center',
|
97 |
+
'background-attachment': 'fixed',
|
98 |
+
'-webkit-background-size': 'cover',
|
99 |
+
'-moz-background-size': 'cover',
|
100 |
+
'-o-background-size': 'cover',
|
101 |
+
'background-size': 'cover'
|
102 |
+
});
|
103 |
+
}
|
104 |
+
|
105 |
+
if (hide_menubar)
|
106 |
+
{$(menu_pattern).toggle(false);}
|
107 |
+
if (hide_header)
|
108 |
+
{$(header_pattern).toggle(false);}
|
109 |
+
}
|
110 |
+
|
111 |
+
// Lastly get notebook to do a resize
|
112 |
+
if (use_layout_manager) {
|
113 |
+
IPython.layout_manager.app_height();
|
114 |
+
IPython.layout_manager.do_resize();
|
115 |
+
}
|
116 |
+
else {
|
117 |
+
events.trigger("resize-header.Page");
|
118 |
+
}
|
119 |
+
};
|
120 |
+
|
121 |
+
var setZenModeActive = function(active, background) {
|
122 |
+
if (active === undefined) { active = true; }
|
123 |
+
console.log("zenmode ->", active);
|
124 |
+
if (getZenModeActive() != active) { toggleZenMode(background); }
|
125 |
+
};
|
126 |
+
|
127 |
+
var initialize = function () {
|
128 |
+
var config = IPython.notebook.config;
|
129 |
+
if (config.data.hasOwnProperty('zenmode_hide_header')) {
|
130 |
+
if (!config.data.zenmode_hide_header) {
|
131 |
+
console.log("not hiding notebook header");
|
132 |
+
hide_header = false;
|
133 |
+
}
|
134 |
+
}
|
135 |
+
|
136 |
+
if (config.data.hasOwnProperty('zenmode_hide_menubar')) {
|
137 |
+
if (!config.data.zenmode_hide_menubar) {
|
138 |
+
console.log("not hiding notebook menubar");
|
139 |
+
hide_menubar = false;
|
140 |
+
}
|
141 |
+
}
|
142 |
+
|
143 |
+
if (config.data.hasOwnProperty('zenmode_use_builtin_backgrounds')) {
|
144 |
+
if (!config.data.zenmode_use_builtin_backgrounds) {
|
145 |
+
console.log("not using builtin zenmode_backgrounds");
|
146 |
+
backgrounds.length = 0;
|
147 |
+
}
|
148 |
+
}
|
149 |
+
|
150 |
+
if (config.data.hasOwnProperty('zenmode_backgrounds')) {
|
151 |
+
if (config.data.zenmode_backgrounds.length > 0) {
|
152 |
+
var new_bg_urls = config.data.zenmode_backgrounds;
|
153 |
+
for (var ii=0; ii < new_bg_urls.length; ii++) {
|
154 |
+
var bg_url = new_bg_urls[ii].replace(/^\s+|\s+$/g, '');
|
155 |
+
if (bg_url.length > 0 && bg_url[0] != '#') {
|
156 |
+
backgrounds.push(bg_url);
|
157 |
+
}
|
158 |
+
}
|
159 |
+
console.log("additional zenmode backgrounds added");
|
160 |
+
}
|
161 |
+
}
|
162 |
+
console.log("zenmode_backgrounds = ", backgrounds);
|
163 |
+
|
164 |
+
if (config.data.hasOwnProperty('zenmode_set_zenmode_on_load')) {
|
165 |
+
setZenModeActive(
|
166 |
+
config.data.zenmode_set_zenmode_on_load ? true : false
|
167 |
+
);
|
168 |
+
}
|
169 |
+
};
|
170 |
+
|
171 |
+
var load_ipython_extension = function(background) {
|
172 |
+
$(IPython.toolbar.add_buttons_group([
|
173 |
+
IPython.keyboard_manager.actions.register({
|
174 |
+
'help' : 'Enter/Exit Zenmode',
|
175 |
+
'icon' : 'fa-empire',
|
176 |
+
'handler': function() {
|
177 |
+
toggleZenMode(background);
|
178 |
+
setTimeout(function () {
|
179 |
+
$('#zenmode-toggle-btn').blur();
|
180 |
+
}, 500);
|
181 |
+
},
|
182 |
+
}, 'toggle-zenmode', 'zenmode'),
|
183 |
+
], 'zenmode-btn-grp')).find('.btn').attr('id', 'zenmode-toggle-btn');
|
184 |
+
$("#maintoolbar-container").prepend($('#zenmode-btn-grp'));
|
185 |
+
return IPython.notebook.config.loaded.then(initialize);
|
186 |
+
};
|
187 |
+
|
188 |
+
var extension = {
|
189 |
+
load_ipython_extension : load_ipython_extension,
|
190 |
+
backgrounds : backgrounds,
|
191 |
+
toggleZenMode : toggleZenMode,
|
192 |
+
getZenModeActive : getZenModeActive,
|
193 |
+
setZenModeActive : setZenModeActive
|
194 |
+
};
|
195 |
+
return extension;
|
196 |
+
});
|
.local/share/jupyter/nbextensions/zenmode/zenmode.yaml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Type: IPython Notebook Extension
|
2 |
+
Name: zenmode
|
3 |
+
Link: README.md
|
4 |
+
Description: A little extension to give Zenmode functionality to the IPython notebook
|
5 |
+
Main: main.js
|
6 |
+
Compatibility: 4.x, 5.x
|
7 |
+
Parameters:
|
8 |
+
- name: zenmode_set_zenmode_on_load
|
9 |
+
description: Set zenmode on when a notebook opens
|
10 |
+
input_type: checkbox
|
11 |
+
default: true
|
12 |
+
- name: zenmode_use_builtin_backgrounds
|
13 |
+
description: Use builtin backgrounds in addition to any specified by URL
|
14 |
+
input_type: checkbox
|
15 |
+
default: true
|
16 |
+
- name: zenmode_hide_header
|
17 |
+
description: Hide the header in zenmode
|
18 |
+
input_type: checkbox
|
19 |
+
default: true
|
20 |
+
- name: zenmode_hide_menubar
|
21 |
+
description: Hide the menubar in zenmode
|
22 |
+
input_type: checkbox
|
23 |
+
default: true
|
24 |
+
- name: zenmode_backgrounds
|
25 |
+
description: "Urls to use as backgrounds. Any beginning with # are ignored."
|
26 |
+
input_type: list
|
27 |
+
list_element:
|
28 |
+
input_type: url
|
.triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.cubin
ADDED
Binary file (60 kB). View file
|
|
.triton/dump/0db70b0f0846c3c6c38c4ccb3ef979e3/triton_.ttir
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
3 |
+
%cst = arith.constant dense<0.000000e+00> : tensor<16x256xbf16>
|
4 |
+
%cst_0 = arith.constant dense<1.000000e+00> : tensor<1x256xf32>
|
5 |
+
%cst_1 = arith.constant dense<0.000000e+00> : tensor<1x256xf32>
|
6 |
+
%cst_2 = arith.constant 0.000000e+00 : f32
|
7 |
+
%cst_3 = arith.constant dense<256> : tensor<16x1xi64>
|
8 |
+
%cst_4 = arith.constant dense<50257> : tensor<16x1xi64>
|
9 |
+
%cst_5 = arith.constant dense<0> : tensor<16x1xi64>
|
10 |
+
%cst_6 = arith.constant dense<9.99999974E-6> : tensor<16x1xf32>
|
11 |
+
%cst_7 = arith.constant dense<2.560000e+02> : tensor<16x1xf32>
|
12 |
+
%cst_8 = arith.constant dense<0.000000e+00> : tensor<16x256xf32>
|
13 |
+
%cst_9 = arith.constant dense<256> : tensor<16x1xi32>
|
14 |
+
%cst_10 = arith.constant dense<256> : tensor<1x256xi32>
|
15 |
+
%cst_11 = arith.constant dense<512> : tensor<16x1xi32>
|
16 |
+
%c16_i32 = arith.constant 16 : i32
|
17 |
+
%0 = tt.get_program_id x : i32
|
18 |
+
%1 = arith.muli %0, %c16_i32 : i32
|
19 |
+
%2 = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32>
|
20 |
+
%3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<16xi32>) -> tensor<16x1xi32>
|
21 |
+
%4 = tt.splat %1 : (i32) -> tensor<16x1xi32>
|
22 |
+
%5 = arith.addi %4, %3 : tensor<16x1xi32>
|
23 |
+
%6 = tt.make_range {end = 256 : i32, start = 0 : i32} : tensor<256xi32>
|
24 |
+
%7 = tt.expand_dims %6 {axis = 0 : i32} : (tensor<256xi32>) -> tensor<1x256xi32>
|
25 |
+
%8 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<16x1x!tt.ptr<i64, 1>>
|
26 |
+
%9 = tt.addptr %8, %5 : tensor<16x1x!tt.ptr<i64, 1>>, tensor<16x1xi32>
|
27 |
+
%10 = tt.load %9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x1xi64>
|
28 |
+
%11 = arith.remsi %5, %cst_11 : tensor<16x1xi32>
|
29 |
+
%12 = arith.cmpi slt, %7, %cst_10 : tensor<1x256xi32>
|
30 |
+
%13 = arith.muli %11, %cst_9 : tensor<16x1xi32>
|
31 |
+
%14 = tt.broadcast %7 : (tensor<1x256xi32>) -> tensor<16x256xi32>
|
32 |
+
%15 = tt.broadcast %13 : (tensor<16x1xi32>) -> tensor<16x256xi32>
|
33 |
+
%16 = arith.addi %14, %15 : tensor<16x256xi32>
|
34 |
+
%17 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<16x256x!tt.ptr<f32, 1>>
|
35 |
+
%18 = tt.addptr %17, %16 : tensor<16x256x!tt.ptr<f32, 1>>, tensor<16x256xi32>
|
36 |
+
%19 = tt.broadcast %12 : (tensor<1x256xi1>) -> tensor<16x256xi1>
|
37 |
+
%20 = tt.load %18, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
|
38 |
+
%21 = arith.muli %5, %cst_9 : tensor<16x1xi32>
|
39 |
+
%22 = tt.broadcast %21 : (tensor<16x1xi32>) -> tensor<16x256xi32>
|
40 |
+
%23 = arith.addi %14, %22 : tensor<16x256xi32>
|
41 |
+
%24 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<16x256x!tt.ptr<bf16, 1>>
|
42 |
+
%25 = tt.addptr %24, %23 : tensor<16x256x!tt.ptr<bf16, 1>>, tensor<16x256xi32>
|
43 |
+
%26 = tt.load %25, %19, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xbf16>
|
44 |
+
%27 = arith.extf %26 : tensor<16x256xbf16> to tensor<16x256xf32>
|
45 |
+
%28 = arith.addi %10, %cst_4 : tensor<16x1xi64>
|
46 |
+
%29 = arith.cmpi slt, %10, %cst_5 : tensor<16x1xi64>
|
47 |
+
%30 = arith.select %29, %28, %10 : tensor<16x1xi1>, tensor<16x1xi64>
|
48 |
+
%31 = arith.cmpi sge, %30, %cst_5 : tensor<16x1xi64>
|
49 |
+
%32 = arith.cmpi slt, %30, %cst_4 : tensor<16x1xi64>
|
50 |
+
%33 = arith.andi %31, %32 : tensor<16x1xi1>
|
51 |
+
tt.assert %33, "index out of bounds: 0 <= tmp3 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<16x1xi1>
|
52 |
+
%34 = arith.muli %30, %cst_3 : tensor<16x1xi64>
|
53 |
+
%35 = tt.broadcast %34 : (tensor<16x1xi64>) -> tensor<16x256xi64>
|
54 |
+
%36 = arith.extsi %7 : tensor<1x256xi32> to tensor<1x256xi64>
|
55 |
+
%37 = tt.broadcast %36 : (tensor<1x256xi64>) -> tensor<16x256xi64>
|
56 |
+
%38 = arith.addi %37, %35 : tensor<16x256xi64>
|
57 |
+
%39 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<16x256x!tt.ptr<f32, 1>>
|
58 |
+
%40 = tt.addptr %39, %38 : tensor<16x256x!tt.ptr<f32, 1>>, tensor<16x256xi64>
|
59 |
+
%41 = tt.load %40, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
|
60 |
+
%42 = arith.addf %41, %20 : tensor<16x256xf32>
|
61 |
+
%43 = arith.addf %42, %27 : tensor<16x256xf32>
|
62 |
+
%44 = arith.addf %43, %cst_8 : tensor<16x256xf32>
|
63 |
+
%45 = arith.subf %43, %44 : tensor<16x256xf32>
|
64 |
+
%46 = arith.mulf %43, %45 : tensor<16x256xf32>
|
65 |
+
%47 = arith.addf %46, %cst_8 : tensor<16x256xf32>
|
66 |
+
%48 = arith.select %19, %44, %cst_8 : tensor<16x256xi1>, tensor<16x256xf32>
|
67 |
+
%49 = arith.select %19, %47, %cst_8 : tensor<16x256xi1>, tensor<16x256xf32>
|
68 |
+
%50 = arith.select %12, %cst_0, %cst_1 : tensor<1x256xi1>, tensor<1x256xf32>
|
69 |
+
%51 = tt.broadcast %50 : (tensor<1x256xf32>) -> tensor<16x256xf32>
|
70 |
+
%52:3 = "tt.reduce"(%48, %49, %51) <{axis = 1 : i32}> ({
|
71 |
+
^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
|
72 |
+
%76 = arith.subf %arg11, %arg8 : f32
|
73 |
+
%77 = arith.addf %arg10, %arg13 : f32
|
74 |
+
%78 = arith.cmpf oeq, %77, %cst_2 : f32
|
75 |
+
%79 = arith.divf %arg13, %77 : f32
|
76 |
+
%80 = arith.select %78, %cst_2, %79 : f32
|
77 |
+
%81 = arith.mulf %76, %80 : f32
|
78 |
+
%82 = arith.addf %arg8, %81 : f32
|
79 |
+
%83 = arith.addf %arg9, %arg12 : f32
|
80 |
+
%84 = arith.mulf %76, %76 : f32
|
81 |
+
%85 = arith.mulf %84, %arg10 : f32
|
82 |
+
%86 = arith.mulf %85, %80 : f32
|
83 |
+
%87 = arith.addf %83, %86 : f32
|
84 |
+
tt.reduce.return %82, %87, %77 : f32, f32, f32
|
85 |
+
}) : (tensor<16x256xf32>, tensor<16x256xf32>, tensor<16x256xf32>) -> (tensor<16xf32>, tensor<16xf32>, tensor<16xf32>)
|
86 |
+
%53 = tt.expand_dims %52#0 {axis = 1 : i32} : (tensor<16xf32>) -> tensor<16x1xf32>
|
87 |
+
%54 = tt.expand_dims %52#1 {axis = 1 : i32} : (tensor<16xf32>) -> tensor<16x1xf32>
|
88 |
+
%55 = tt.load %18, %19, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<16x256xf32>
|
89 |
+
%56 = tt.load %25, %19, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<16x256xbf16>
|
90 |
+
%57 = arith.extf %56 : tensor<16x256xbf16> to tensor<16x256xf32>
|
91 |
+
%58 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x256x!tt.ptr<f32, 1>>
|
92 |
+
%59 = tt.addptr %58, %7 : tensor<1x256x!tt.ptr<f32, 1>>, tensor<1x256xi32>
|
93 |
+
%60 = tt.load %59, %12, %cst_1 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x256xf32>
|
94 |
+
tt.assert %33, "index out of bounds: 0 <= tmp16 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<16x1xi1>
|
95 |
+
%61 = tt.load %40, %19, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<16x256xf32>
|
96 |
+
%62 = arith.addf %61, %55 : tensor<16x256xf32>
|
97 |
+
%63 = arith.addf %62, %57 : tensor<16x256xf32>
|
98 |
+
%64 = tt.broadcast %53 : (tensor<16x1xf32>) -> tensor<16x256xf32>
|
99 |
+
%65 = arith.subf %63, %64 : tensor<16x256xf32>
|
100 |
+
%66 = arith.divf %54, %cst_7 : tensor<16x1xf32>
|
101 |
+
%67 = arith.addf %66, %cst_6 : tensor<16x1xf32>
|
102 |
+
%68 = tt.extern_elementwise %67 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<16x1xf32>) -> tensor<16x1xf32>
|
103 |
+
%69 = tt.broadcast %68 : (tensor<16x1xf32>) -> tensor<16x256xf32>
|
104 |
+
%70 = arith.mulf %65, %69 : tensor<16x256xf32>
|
105 |
+
%71 = tt.broadcast %60 : (tensor<1x256xf32>) -> tensor<16x256xf32>
|
106 |
+
%72 = arith.mulf %70, %71 : tensor<16x256xf32>
|
107 |
+
%73 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<16x256x!tt.ptr<bf16, 1>>
|
108 |
+
%74 = tt.addptr %73, %23 : tensor<16x256x!tt.ptr<bf16, 1>>, tensor<16x256xi32>
|
109 |
+
%75 = arith.truncf %72 : tensor<16x256xf32> to tensor<16x256xbf16>
|
110 |
+
tt.store %74, %75, %19 {cache = 1 : i32, evict = 1 : i32} : tensor<16x256xbf16>
|
111 |
+
tt.return
|
112 |
+
}
|
113 |
+
}
|
.triton/dump/174400122b6dbc99e086544aa1856b9f/triton_.cubin
ADDED
Binary file (32 kB). View file
|
|
.triton/dump/199215289adb100508718a5a762ba4d7/triton_.cubin
ADDED
Binary file (13 kB). View file
|
|
.triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.cubin
ADDED
Binary file (5.54 kB). View file
|
|
.triton/dump/1c14bdb6903aa6825e214bbdf57fd077/triton_.ptx
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1d2de
|
10 |
+
|
11 |
+
.visible .entry triton__0d1d2de(
|
12 |
+
.param .u64 triton__0d1d2de_param_0,
|
13 |
+
.param .u64 triton__0d1d2de_param_1,
|
14 |
+
.param .u32 triton__0d1d2de_param_2
|
15 |
+
)
|
16 |
+
.maxntid 128, 1, 1
|
17 |
+
{
|
18 |
+
.reg .pred %p<4>;
|
19 |
+
.reg .b16 %rs<9>;
|
20 |
+
.reg .b32 %r<31>;
|
21 |
+
.reg .b64 %rd<8>;
|
22 |
+
.loc 1 18 0
|
23 |
+
$L__func_begin0:
|
24 |
+
.loc 1 18 0
|
25 |
+
|
26 |
+
ld.param.u64 %rd4, [triton__0d1d2de_param_0];
|
27 |
+
ld.param.u64 %rd5, [triton__0d1d2de_param_1];
|
28 |
+
$L__tmp0:
|
29 |
+
.loc 1 21 36
|
30 |
+
mov.u32 %r22, %tid.x;
|
31 |
+
shl.b32 %r23, %r22, 3;
|
32 |
+
and.b32 %r24, %r23, 1016;
|
33 |
+
.loc 1 20 28
|
34 |
+
mov.u32 %r1, %ctaid.x;
|
35 |
+
.loc 1 20 33
|
36 |
+
shl.b32 %r25, %r1, 10;
|
37 |
+
.loc 1 21 23
|
38 |
+
or.b32 %r26, %r25, %r24;
|
39 |
+
.loc 1 24 30
|
40 |
+
mul.wide.s32 %rd6, %r26, 4;
|
41 |
+
add.s64 %rd1, %rd4, %rd6;
|
42 |
+
add.s64 %rd2, %rd1, 16;
|
43 |
+
mov.pred %p1, -1;
|
44 |
+
.loc 1 24 35
|
45 |
+
mov.u32 %r10, 0x0;
|
46 |
+
mov.u32 %r11, 0x0;
|
47 |
+
mov.u32 %r12, 0x0;
|
48 |
+
mov.u32 %r13, 0x0;
|
49 |
+
@%p1 ld.global.v4.b32 { %r10, %r11, %r12, %r13 }, [ %rd1 + 0 ];
|
50 |
+
mov.u32 %r14, 0x0;
|
51 |
+
mov.u32 %r15, 0x0;
|
52 |
+
mov.u32 %r16, 0x0;
|
53 |
+
mov.u32 %r17, 0x0;
|
54 |
+
@%p1 ld.global.v4.b32 { %r14, %r15, %r16, %r17 }, [ %rd2 + 0 ];
|
55 |
+
.loc 1 26 25
|
56 |
+
mul.wide.s32 %rd7, %r26, 2;
|
57 |
+
add.s64 %rd3, %rd5, %rd7;
|
58 |
+
.loc 1 26 36
|
59 |
+
cvt.rn.bf16.f32 %rs1, %r10;
|
60 |
+
cvt.rn.bf16.f32 %rs2, %r11;
|
61 |
+
cvt.rn.bf16.f32 %rs3, %r12;
|
62 |
+
cvt.rn.bf16.f32 %rs4, %r13;
|
63 |
+
cvt.rn.bf16.f32 %rs5, %r14;
|
64 |
+
cvt.rn.bf16.f32 %rs6, %r15;
|
65 |
+
cvt.rn.bf16.f32 %rs7, %r16;
|
66 |
+
cvt.rn.bf16.f32 %rs8, %r17;
|
67 |
+
mov.b32 %r27, {%rs1, %rs2};
|
68 |
+
mov.b32 %r28, {%rs3, %rs4};
|
69 |
+
mov.b32 %r29, {%rs5, %rs6};
|
70 |
+
mov.b32 %r30, {%rs7, %rs8};
|
71 |
+
@%p1 st.global.v4.b32 [ %rd3 + 0 ], { %r27, %r28, %r29, %r30 };
|
72 |
+
.loc 1 26 4
|
73 |
+
ret;
|
74 |
+
$L__tmp1:
|
75 |
+
$L__func_end0:
|
76 |
+
|
77 |
+
}
|
78 |
+
.file 1 "/tmp/torchinductor_root/5t/c5tryp5qwkhreijk7s5x327wofz54lwj4kvctuqdzv2vrf2xyons.py"
|
79 |
+
.section .debug_abbrev
|
80 |
+
{
|
81 |
+
.b8 1
|
82 |
+
.b8 17
|
83 |
+
.b8 1
|
84 |
+
.b8 37
|
85 |
+
.b8 8
|
86 |
+
.b8 19
|
87 |
+
.b8 5
|
88 |
+
.b8 3
|
89 |
+
.b8 8
|
90 |
+
.b8 16
|
91 |
+
.b8 6
|
92 |
+
.b8 27
|
93 |
+
.b8 8
|
94 |
+
.b8 180
|
95 |
+
.b8 66
|
96 |
+
.b8 12
|
97 |
+
.b8 17
|
98 |
+
.b8 1
|
99 |
+
.b8 18
|
100 |
+
.b8 1
|
101 |
+
.b8 0
|
102 |
+
.b8 0
|
103 |
+
.b8 2
|
104 |
+
.b8 46
|
105 |
+
.b8 0
|
106 |
+
.b8 17
|
107 |
+
.b8 1
|
108 |
+
.b8 18
|
109 |
+
.b8 1
|
110 |
+
.b8 64
|
111 |
+
.b8 10
|
112 |
+
.b8 135
|
113 |
+
.b8 64
|
114 |
+
.b8 8
|
115 |
+
.b8 3
|
116 |
+
.b8 8
|
117 |
+
.b8 58
|
118 |
+
.b8 11
|
119 |
+
.b8 59
|
120 |
+
.b8 11
|
121 |
+
.b8 63
|
122 |
+
.b8 12
|
123 |
+
.b8 0
|
124 |
+
.b8 0
|
125 |
+
.b8 0
|
126 |
+
}
|
127 |
+
.section .debug_info
|
128 |
+
{
|
129 |
+
.b32 176
|
130 |
+
.b8 2
|
131 |
+
.b8 0
|
132 |
+
.b32 .debug_abbrev
|
133 |
+
.b8 8
|
134 |
+
.b8 1
|
135 |
+
.b8 116
|
136 |
+
.b8 114
|
137 |
+
.b8 105
|
138 |
+
.b8 116
|
139 |
+
.b8 111
|
140 |
+
.b8 110
|
141 |
+
.b8 0
|
142 |
+
.b8 2
|
143 |
+
.b8 0
|
144 |
+
.b8 99
|
145 |
+
.b8 53
|
146 |
+
.b8 116
|
147 |
+
.b8 114
|
148 |
+
.b8 121
|
149 |
+
.b8 112
|
150 |
+
.b8 53
|
151 |
+
.b8 113
|
152 |
+
.b8 119
|
153 |
+
.b8 107
|
154 |
+
.b8 104
|
155 |
+
.b8 114
|
156 |
+
.b8 101
|
157 |
+
.b8 105
|
158 |
+
.b8 106
|
159 |
+
.b8 107
|
160 |
+
.b8 55
|
161 |
+
.b8 115
|
162 |
+
.b8 53
|
163 |
+
.b8 120
|
164 |
+
.b8 51
|
165 |
+
.b8 50
|
166 |
+
.b8 55
|
167 |
+
.b8 119
|
168 |
+
.b8 111
|
169 |
+
.b8 102
|
170 |
+
.b8 122
|
171 |
+
.b8 53
|
172 |
+
.b8 52
|
173 |
+
.b8 108
|
174 |
+
.b8 119
|
175 |
+
.b8 106
|
176 |
+
.b8 52
|
177 |
+
.b8 107
|
178 |
+
.b8 118
|
179 |
+
.b8 99
|
180 |
+
.b8 116
|
181 |
+
.b8 117
|
182 |
+
.b8 113
|
183 |
+
.b8 100
|
184 |
+
.b8 122
|
185 |
+
.b8 118
|
186 |
+
.b8 50
|
187 |
+
.b8 118
|
188 |
+
.b8 114
|
189 |
+
.b8 102
|
190 |
+
.b8 50
|
191 |
+
.b8 120
|
192 |
+
.b8 121
|
193 |
+
.b8 111
|
194 |
+
.b8 110
|
195 |
+
.b8 115
|
196 |
+
.b8 46
|
197 |
+
.b8 112
|
198 |
+
.b8 121
|
199 |
+
.b8 0
|
200 |
+
.b32 .debug_line
|
201 |
+
.b8 47
|
202 |
+
.b8 116
|
203 |
+
.b8 109
|
204 |
+
.b8 112
|
205 |
+
.b8 47
|
206 |
+
.b8 116
|
207 |
+
.b8 111
|
208 |
+
.b8 114
|
209 |
+
.b8 99
|
210 |
+
.b8 104
|
211 |
+
.b8 105
|
212 |
+
.b8 110
|
213 |
+
.b8 100
|
214 |
+
.b8 117
|
215 |
+
.b8 99
|
216 |
+
.b8 116
|
217 |
+
.b8 111
|
218 |
+
.b8 114
|
219 |
+
.b8 95
|
220 |
+
.b8 114
|
221 |
+
.b8 111
|
222 |
+
.b8 111
|
223 |
+
.b8 116
|
224 |
+
.b8 47
|
225 |
+
.b8 53
|
226 |
+
.b8 116
|
227 |
+
.b8 0
|
228 |
+
.b8 1
|
229 |
+
.b64 $L__func_begin0
|
230 |
+
.b64 $L__func_end0
|
231 |
+
.b8 2
|
232 |
+
.b64 $L__func_begin0
|
233 |
+
.b64 $L__func_end0
|
234 |
+
.b8 1
|
235 |
+
.b8 156
|
236 |
+
.b8 116
|
237 |
+
.b8 114
|
238 |
+
.b8 105
|
239 |
+
.b8 116
|
240 |
+
.b8 111
|
241 |
+
.b8 110
|
242 |
+
.b8 95
|
243 |
+
.b8 95
|
244 |
+
.b8 48
|
245 |
+
.b8 100
|
246 |
+
.b8 49
|
247 |
+
.b8 100
|
248 |
+
.b8 50
|
249 |
+
.b8 100
|
250 |
+
.b8 101
|
251 |
+
.b8 0
|
252 |
+
.b8 116
|
253 |
+
.b8 114
|
254 |
+
.b8 105
|
255 |
+
.b8 116
|
256 |
+
.b8 111
|
257 |
+
.b8 110
|
258 |
+
.b8 95
|
259 |
+
.b8 95
|
260 |
+
.b8 48
|
261 |
+
.b8 100
|
262 |
+
.b8 49
|
263 |
+
.b8 100
|
264 |
+
.b8 50
|
265 |
+
.b8 100
|
266 |
+
.b8 101
|
267 |
+
.b8 0
|
268 |
+
.b8 1
|
269 |
+
.b8 18
|
270 |
+
.b8 1
|
271 |
+
.b8 0
|
272 |
+
}
|
273 |
+
.section .debug_pubnames
|
274 |
+
{
|
275 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
276 |
+
$L__pubNames_start0:
|
277 |
+
.b8 2
|
278 |
+
.b8 0
|
279 |
+
.b32 .debug_info
|
280 |
+
.b32 180
|
281 |
+
.b32 125
|
282 |
+
.b8 116
|
283 |
+
.b8 114
|
284 |
+
.b8 105
|
285 |
+
.b8 116
|
286 |
+
.b8 111
|
287 |
+
.b8 110
|
288 |
+
.b8 95
|
289 |
+
.b8 95
|
290 |
+
.b8 48
|
291 |
+
.b8 100
|
292 |
+
.b8 49
|
293 |
+
.b8 100
|
294 |
+
.b8 50
|
295 |
+
.b8 100
|
296 |
+
.b8 101
|
297 |
+
.b8 0
|
298 |
+
.b32 0
|
299 |
+
$L__pubNames_end0:
|
300 |
+
}
|
301 |
+
.section .debug_pubtypes
|
302 |
+
{
|
303 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
304 |
+
$L__pubTypes_start0:
|
305 |
+
.b8 2
|
306 |
+
.b8 0
|
307 |
+
.b32 .debug_info
|
308 |
+
.b32 180
|
309 |
+
.b32 0
|
310 |
+
$L__pubTypes_end0:
|
311 |
+
}
|
312 |
+
.section .debug_loc { }
|
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.cubin
ADDED
Binary file (10.5 kB). View file
|
|
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.llir
ADDED
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
; ModuleID = 'LLVMDialectModule'
|
2 |
+
source_filename = "LLVMDialectModule"
|
3 |
+
|
4 |
+
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
|
5 |
+
|
6 |
+
define void @triton__0d1de(ptr addrspace(1) %0, i32 %1) local_unnamed_addr !dbg !7 {
|
7 |
+
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
|
8 |
+
%4 = shl i32 %3, 1, !dbg !10
|
9 |
+
%5 = and i32 %4, 510, !dbg !10
|
10 |
+
%6 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #4, !dbg !11
|
11 |
+
%7 = shl i32 %6, 9, !dbg !12
|
12 |
+
%8 = or i32 %7, %5, !dbg !13
|
13 |
+
%9 = sext i32 %8 to i64, !dbg !14
|
14 |
+
%10 = getelementptr i16, ptr addrspace(1) %0, i64 %9, !dbg !14
|
15 |
+
%11 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %10, i1 true) #4, !dbg !15
|
16 |
+
%12 = trunc i32 %11 to i16, !dbg !15
|
17 |
+
%extelt.offset = lshr i32 %11, 16, !dbg !15
|
18 |
+
%13 = trunc i32 %extelt.offset to i16, !dbg !15
|
19 |
+
%14 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %12) #4, !dbg !16
|
20 |
+
%15 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %13) #4, !dbg !16
|
21 |
+
%16 = fmul float %14, 0x3FE6A09E60000000, !dbg !17
|
22 |
+
%17 = fmul float %15, 0x3FE6A09E60000000, !dbg !17
|
23 |
+
%18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
24 |
+
%.not.i = icmp eq i32 %18, 0, !dbg !18
|
25 |
+
%19 = tail call float @llvm.nvvm.fabs.ftz.f(float %16) #4, !dbg !18
|
26 |
+
%20 = tail call float @llvm.nvvm.fabs.f(float %16) #4, !dbg !18
|
27 |
+
%.0.i = select i1 %.not.i, float %20, float %19, !dbg !18
|
28 |
+
%21 = fcmp oge float %.0.i, 0x3FF00C1FC0000000, !dbg !18
|
29 |
+
br i1 %21, label %__nv_fabsf.exit1.i, label %23, !dbg !18
|
30 |
+
|
31 |
+
__nv_fabsf.exit1.i: ; preds = %2
|
32 |
+
%22 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
33 |
+
%.not1.i = icmp eq i32 %22, 0, !dbg !18
|
34 |
+
%.01.i = select i1 %.not1.i, float %20, float %19, !dbg !18
|
35 |
+
br label %__internal_fmad.exit.i, !dbg !18
|
36 |
+
|
37 |
+
23: ; preds = %2
|
38 |
+
%24 = fmul float %16, %16, !dbg !18
|
39 |
+
br label %__internal_fmad.exit.i, !dbg !18
|
40 |
+
|
41 |
+
__internal_fmad.exit.i: ; preds = %23, %__nv_fabsf.exit1.i
|
42 |
+
%25 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i ], [ 0x3FC06EBA60000000, %23 ], !dbg !18
|
43 |
+
%26 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i ], [ 0xBFD8127580000000, %23 ], !dbg !18
|
44 |
+
%27 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i ], [ 0x3FBCE315E0000000, %23 ], !dbg !18
|
45 |
+
%28 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i ], [ 0xBF9B837CE0000000, %23 ], !dbg !18
|
46 |
+
%29 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i ], [ 0x3F755ABD40000000, %23 ], !dbg !18
|
47 |
+
%30 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i ], [ 0xBF4AE9A400000000, %23 ], !dbg !18
|
48 |
+
%31 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i ], [ 0x3F163D2D40000000, %23 ], !dbg !18
|
49 |
+
%32 = phi float [ %.01.i, %__nv_fabsf.exit1.i ], [ %24, %23 ], !dbg !18
|
50 |
+
%33 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
51 |
+
%.not2.i = icmp eq i32 %33, 0, !dbg !18
|
52 |
+
%34 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %31, float %32, float %30) #4, !dbg !18
|
53 |
+
%35 = tail call float @llvm.nvvm.fma.rn.f(float %31, float %32, float %30) #4, !dbg !18
|
54 |
+
%.02.i = select i1 %.not2.i, float %35, float %34, !dbg !18
|
55 |
+
%36 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
56 |
+
%.not3.i = icmp eq i32 %36, 0, !dbg !18
|
57 |
+
%37 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float %32, float %29) #4, !dbg !18
|
58 |
+
%38 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float %32, float %29) #4, !dbg !18
|
59 |
+
%.03.i = select i1 %.not3.i, float %38, float %37, !dbg !18
|
60 |
+
%39 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
61 |
+
%.not4.i = icmp eq i32 %39, 0, !dbg !18
|
62 |
+
%40 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i, float %32, float %28) #4, !dbg !18
|
63 |
+
%41 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i, float %32, float %28) #4, !dbg !18
|
64 |
+
%.04.i = select i1 %.not4.i, float %41, float %40, !dbg !18
|
65 |
+
%42 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
66 |
+
%.not5.i = icmp eq i32 %42, 0, !dbg !18
|
67 |
+
%43 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i, float %32, float %27) #4, !dbg !18
|
68 |
+
%44 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i, float %32, float %27) #4, !dbg !18
|
69 |
+
%.05.i = select i1 %.not5.i, float %44, float %43, !dbg !18
|
70 |
+
%45 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
71 |
+
%.not6.i = icmp eq i32 %45, 0, !dbg !18
|
72 |
+
%46 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %32, float %26) #4, !dbg !18
|
73 |
+
%47 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %32, float %26) #4, !dbg !18
|
74 |
+
%.06.i = select i1 %.not6.i, float %47, float %46, !dbg !18
|
75 |
+
%48 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
76 |
+
%.not7.i = icmp eq i32 %48, 0, !dbg !18
|
77 |
+
%49 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i, float %32, float %25) #4, !dbg !18
|
78 |
+
%50 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i, float %32, float %25) #4, !dbg !18
|
79 |
+
%.07.i = select i1 %.not7.i, float %50, float %49, !dbg !18
|
80 |
+
%51 = fneg float %32, !dbg !18
|
81 |
+
%52 = select i1 %21, float %51, float %16, !dbg !18
|
82 |
+
%53 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
83 |
+
%.not8.i = icmp eq i32 %53, 0, !dbg !18
|
84 |
+
%54 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i, float %52, float %52) #4, !dbg !18
|
85 |
+
%55 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i, float %52, float %52) #4, !dbg !18
|
86 |
+
%.08.i = select i1 %.not8.i, float %55, float %54, !dbg !18
|
87 |
+
br i1 %21, label %56, label %__nv_erff.exit, !dbg !18
|
88 |
+
|
89 |
+
56: ; preds = %__internal_fmad.exit.i
|
90 |
+
%57 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i) #4, !dbg !18
|
91 |
+
%58 = fsub float 1.000000e+00, %57, !dbg !18
|
92 |
+
%59 = bitcast float %58 to i32, !dbg !18
|
93 |
+
%60 = bitcast float %16 to i32, !dbg !18
|
94 |
+
%61 = and i32 %60, -2147483648, !dbg !18
|
95 |
+
%62 = or i32 %61, %59, !dbg !18
|
96 |
+
%63 = bitcast i32 %62 to float, !dbg !18
|
97 |
+
br label %__nv_erff.exit, !dbg !18
|
98 |
+
|
99 |
+
__nv_erff.exit: ; preds = %__internal_fmad.exit.i, %56
|
100 |
+
%r.0.i = phi float [ %63, %56 ], [ %.08.i, %__internal_fmad.exit.i ], !dbg !18
|
101 |
+
%64 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
102 |
+
%.not.i1 = icmp eq i32 %64, 0, !dbg !18
|
103 |
+
%65 = tail call float @llvm.nvvm.fabs.ftz.f(float %17) #4, !dbg !18
|
104 |
+
%66 = tail call float @llvm.nvvm.fabs.f(float %17) #4, !dbg !18
|
105 |
+
%.0.i2 = select i1 %.not.i1, float %66, float %65, !dbg !18
|
106 |
+
%67 = fcmp oge float %.0.i2, 0x3FF00C1FC0000000, !dbg !18
|
107 |
+
br i1 %67, label %__nv_fabsf.exit1.i19, label %69, !dbg !18
|
108 |
+
|
109 |
+
__nv_fabsf.exit1.i19: ; preds = %__nv_erff.exit
|
110 |
+
%68 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
111 |
+
%.not1.i20 = icmp eq i32 %68, 0, !dbg !18
|
112 |
+
%.01.i21 = select i1 %.not1.i20, float %66, float %65, !dbg !18
|
113 |
+
br label %__internal_fmad.exit.i3, !dbg !18
|
114 |
+
|
115 |
+
69: ; preds = %__nv_erff.exit
|
116 |
+
%70 = fmul float %17, %17, !dbg !18
|
117 |
+
br label %__internal_fmad.exit.i3, !dbg !18
|
118 |
+
|
119 |
+
__internal_fmad.exit.i3: ; preds = %69, %__nv_fabsf.exit1.i19
|
120 |
+
%71 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i19 ], [ 0x3FC06EBA60000000, %69 ], !dbg !18
|
121 |
+
%72 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i19 ], [ 0xBFD8127580000000, %69 ], !dbg !18
|
122 |
+
%73 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i19 ], [ 0x3FBCE315E0000000, %69 ], !dbg !18
|
123 |
+
%74 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i19 ], [ 0xBF9B837CE0000000, %69 ], !dbg !18
|
124 |
+
%75 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i19 ], [ 0x3F755ABD40000000, %69 ], !dbg !18
|
125 |
+
%76 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i19 ], [ 0xBF4AE9A400000000, %69 ], !dbg !18
|
126 |
+
%77 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i19 ], [ 0x3F163D2D40000000, %69 ], !dbg !18
|
127 |
+
%78 = phi float [ %.01.i21, %__nv_fabsf.exit1.i19 ], [ %70, %69 ], !dbg !18
|
128 |
+
%79 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
129 |
+
%.not2.i4 = icmp eq i32 %79, 0, !dbg !18
|
130 |
+
%80 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %77, float %78, float %76) #4, !dbg !18
|
131 |
+
%81 = tail call float @llvm.nvvm.fma.rn.f(float %77, float %78, float %76) #4, !dbg !18
|
132 |
+
%.02.i5 = select i1 %.not2.i4, float %81, float %80, !dbg !18
|
133 |
+
%82 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
134 |
+
%.not3.i6 = icmp eq i32 %82, 0, !dbg !18
|
135 |
+
%83 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i5, float %78, float %75) #4, !dbg !18
|
136 |
+
%84 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i5, float %78, float %75) #4, !dbg !18
|
137 |
+
%.03.i7 = select i1 %.not3.i6, float %84, float %83, !dbg !18
|
138 |
+
%85 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
139 |
+
%.not4.i8 = icmp eq i32 %85, 0, !dbg !18
|
140 |
+
%86 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i7, float %78, float %74) #4, !dbg !18
|
141 |
+
%87 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i7, float %78, float %74) #4, !dbg !18
|
142 |
+
%.04.i9 = select i1 %.not4.i8, float %87, float %86, !dbg !18
|
143 |
+
%88 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
144 |
+
%.not5.i10 = icmp eq i32 %88, 0, !dbg !18
|
145 |
+
%89 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i9, float %78, float %73) #4, !dbg !18
|
146 |
+
%90 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i9, float %78, float %73) #4, !dbg !18
|
147 |
+
%.05.i11 = select i1 %.not5.i10, float %90, float %89, !dbg !18
|
148 |
+
%91 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
149 |
+
%.not6.i12 = icmp eq i32 %91, 0, !dbg !18
|
150 |
+
%92 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i11, float %78, float %72) #4, !dbg !18
|
151 |
+
%93 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i11, float %78, float %72) #4, !dbg !18
|
152 |
+
%.06.i13 = select i1 %.not6.i12, float %93, float %92, !dbg !18
|
153 |
+
%94 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
154 |
+
%.not7.i14 = icmp eq i32 %94, 0, !dbg !18
|
155 |
+
%95 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i13, float %78, float %71) #4, !dbg !18
|
156 |
+
%96 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i13, float %78, float %71) #4, !dbg !18
|
157 |
+
%.07.i15 = select i1 %.not7.i14, float %96, float %95, !dbg !18
|
158 |
+
%97 = fneg float %78, !dbg !18
|
159 |
+
%98 = select i1 %67, float %97, float %17, !dbg !18
|
160 |
+
%99 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !18
|
161 |
+
%.not8.i16 = icmp eq i32 %99, 0, !dbg !18
|
162 |
+
%100 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i15, float %98, float %98) #4, !dbg !18
|
163 |
+
%101 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i15, float %98, float %98) #4, !dbg !18
|
164 |
+
%.08.i17 = select i1 %.not8.i16, float %101, float %100, !dbg !18
|
165 |
+
br i1 %67, label %102, label %__nv_erff.exit22, !dbg !18
|
166 |
+
|
167 |
+
102: ; preds = %__internal_fmad.exit.i3
|
168 |
+
%103 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i17) #4, !dbg !18
|
169 |
+
%104 = fsub float 1.000000e+00, %103, !dbg !18
|
170 |
+
%105 = bitcast float %104 to i32, !dbg !18
|
171 |
+
%106 = bitcast float %17 to i32, !dbg !18
|
172 |
+
%107 = and i32 %106, -2147483648, !dbg !18
|
173 |
+
%108 = or i32 %107, %105, !dbg !18
|
174 |
+
%109 = bitcast i32 %108 to float, !dbg !18
|
175 |
+
br label %__nv_erff.exit22, !dbg !18
|
176 |
+
|
177 |
+
__nv_erff.exit22: ; preds = %__internal_fmad.exit.i3, %102
|
178 |
+
%r.0.i18 = phi float [ %109, %102 ], [ %.08.i17, %__internal_fmad.exit.i3 ], !dbg !18
|
179 |
+
%110 = fmul float %15, 5.000000e-01, !dbg !19
|
180 |
+
%111 = fmul float %14, 5.000000e-01, !dbg !19
|
181 |
+
%112 = fadd float %r.0.i, 1.000000e+00, !dbg !20
|
182 |
+
%113 = fadd float %r.0.i18, 1.000000e+00, !dbg !20
|
183 |
+
%114 = fmul float %111, %112, !dbg !21
|
184 |
+
%115 = fmul float %110, %113, !dbg !21
|
185 |
+
%116 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %114) #4, !dbg !22
|
186 |
+
%117 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %115) #4, !dbg !22
|
187 |
+
%118 = insertelement <2 x i16> undef, i16 %116, i64 0, !dbg !22
|
188 |
+
%119 = insertelement <2 x i16> %118, i16 %117, i64 1, !dbg !22
|
189 |
+
%120 = bitcast <2 x i16> %119 to i32, !dbg !22
|
190 |
+
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %120, ptr addrspace(1) %10, i1 true) #4, !dbg !22
|
191 |
+
ret void, !dbg !23
|
192 |
+
}
|
193 |
+
|
194 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
195 |
+
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
|
196 |
+
|
197 |
+
; Function Attrs: alwaysinline nounwind
|
198 |
+
define float @__nv_erff(float %a) local_unnamed_addr #1 {
|
199 |
+
__nv_fabsf.exit:
|
200 |
+
%0 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
201 |
+
%.not = icmp eq i32 %0, 0
|
202 |
+
%1 = tail call float @llvm.nvvm.fabs.ftz.f(float %a) #4
|
203 |
+
%2 = tail call float @llvm.nvvm.fabs.f(float %a) #4
|
204 |
+
%.0 = select i1 %.not, float %2, float %1
|
205 |
+
%3 = fcmp oge float %.0, 0x3FF00C1FC0000000
|
206 |
+
br i1 %3, label %__nv_fabsf.exit1, label %5
|
207 |
+
|
208 |
+
__nv_fabsf.exit1: ; preds = %__nv_fabsf.exit
|
209 |
+
%4 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
210 |
+
%.not1 = icmp eq i32 %4, 0
|
211 |
+
%.01 = select i1 %.not1, float %2, float %1
|
212 |
+
br label %__internal_fmad.exit
|
213 |
+
|
214 |
+
5: ; preds = %__nv_fabsf.exit
|
215 |
+
%6 = fmul float %a, %a
|
216 |
+
br label %__internal_fmad.exit
|
217 |
+
|
218 |
+
__internal_fmad.exit: ; preds = %5, %__nv_fabsf.exit1
|
219 |
+
%7 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1 ], [ 0x3FC06EBA60000000, %5 ]
|
220 |
+
%8 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1 ], [ 0xBFD8127580000000, %5 ]
|
221 |
+
%9 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1 ], [ 0x3FBCE315E0000000, %5 ]
|
222 |
+
%10 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1 ], [ 0xBF9B837CE0000000, %5 ]
|
223 |
+
%11 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1 ], [ 0x3F755ABD40000000, %5 ]
|
224 |
+
%12 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1 ], [ 0xBF4AE9A400000000, %5 ]
|
225 |
+
%13 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1 ], [ 0x3F163D2D40000000, %5 ]
|
226 |
+
%14 = phi float [ %.01, %__nv_fabsf.exit1 ], [ %6, %5 ]
|
227 |
+
%15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
228 |
+
%.not2 = icmp eq i32 %15, 0
|
229 |
+
%16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %13, float %14, float %12) #4
|
230 |
+
%17 = tail call float @llvm.nvvm.fma.rn.f(float %13, float %14, float %12) #4
|
231 |
+
%.02 = select i1 %.not2, float %17, float %16
|
232 |
+
%18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
233 |
+
%.not3 = icmp eq i32 %18, 0
|
234 |
+
%19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float %14, float %11) #4
|
235 |
+
%20 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float %14, float %11) #4
|
236 |
+
%.03 = select i1 %.not3, float %20, float %19
|
237 |
+
%21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
238 |
+
%.not4 = icmp eq i32 %21, 0
|
239 |
+
%22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03, float %14, float %10) #4
|
240 |
+
%23 = tail call float @llvm.nvvm.fma.rn.f(float %.03, float %14, float %10) #4
|
241 |
+
%.04 = select i1 %.not4, float %23, float %22
|
242 |
+
%24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
243 |
+
%.not5 = icmp eq i32 %24, 0
|
244 |
+
%25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04, float %14, float %9) #4
|
245 |
+
%26 = tail call float @llvm.nvvm.fma.rn.f(float %.04, float %14, float %9) #4
|
246 |
+
%.05 = select i1 %.not5, float %26, float %25
|
247 |
+
%27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
248 |
+
%.not6 = icmp eq i32 %27, 0
|
249 |
+
%28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %14, float %8) #4
|
250 |
+
%29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %14, float %8) #4
|
251 |
+
%.06 = select i1 %.not6, float %29, float %28
|
252 |
+
%30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
253 |
+
%.not7 = icmp eq i32 %30, 0
|
254 |
+
%31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06, float %14, float %7) #4
|
255 |
+
%32 = tail call float @llvm.nvvm.fma.rn.f(float %.06, float %14, float %7) #4
|
256 |
+
%.07 = select i1 %.not7, float %32, float %31
|
257 |
+
%33 = fneg float %14
|
258 |
+
%34 = select i1 %3, float %33, float %a
|
259 |
+
%35 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
|
260 |
+
%.not8 = icmp eq i32 %35, 0
|
261 |
+
%36 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07, float %34, float %34) #4
|
262 |
+
%37 = tail call float @llvm.nvvm.fma.rn.f(float %.07, float %34, float %34) #4
|
263 |
+
%.08 = select i1 %.not8, float %37, float %36
|
264 |
+
br i1 %3, label %38, label %46
|
265 |
+
|
266 |
+
38: ; preds = %__internal_fmad.exit
|
267 |
+
%39 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08) #4
|
268 |
+
%40 = fsub float 1.000000e+00, %39
|
269 |
+
%41 = bitcast float %40 to i32
|
270 |
+
%42 = bitcast float %a to i32
|
271 |
+
%43 = and i32 %42, -2147483648
|
272 |
+
%44 = or i32 %43, %41
|
273 |
+
%45 = bitcast i32 %44 to float
|
274 |
+
br label %46
|
275 |
+
|
276 |
+
46: ; preds = %38, %__internal_fmad.exit
|
277 |
+
%r.0 = phi float [ %45, %38 ], [ %.08, %__internal_fmad.exit ]
|
278 |
+
ret float %r.0
|
279 |
+
}
|
280 |
+
|
281 |
+
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #2
|
282 |
+
|
283 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
284 |
+
declare float @llvm.nvvm.fabs.ftz.f(float) #0
|
285 |
+
|
286 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
287 |
+
declare float @llvm.nvvm.fabs.f(float) #0
|
288 |
+
|
289 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
290 |
+
declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0
|
291 |
+
|
292 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
293 |
+
declare float @llvm.nvvm.fma.rn.f(float, float, float) #0
|
294 |
+
|
295 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
|
296 |
+
declare float @llvm.nvvm.ex2.approx.ftz.f(float) #3
|
297 |
+
|
298 |
+
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
299 |
+
attributes #1 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
300 |
+
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
301 |
+
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
|
302 |
+
attributes #4 = { nounwind }
|
303 |
+
|
304 |
+
!llvm.module.flags = !{!0, !1}
|
305 |
+
!llvm.dbg.cu = !{!2}
|
306 |
+
!nvvm.annotations = !{!4, !5, !5, !4}
|
307 |
+
!llvm.ident = !{!6}
|
308 |
+
|
309 |
+
!0 = !{i32 2, !"Debug Info Version", i32 3}
|
310 |
+
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
|
311 |
+
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
312 |
+
!3 = !DIFile(filename: "cafucwnmq4o436kwzkmrinerrnocxll7q6wsadcl726g6cradipo.py", directory: "/tmp/torchinductor_root/af")
|
313 |
+
!4 = !{ptr @triton__0d1de, !"kernel", i32 1}
|
314 |
+
!5 = !{ptr @triton__0d1de, !"maxntidx", i32 256}
|
315 |
+
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
|
316 |
+
!7 = distinct !DISubprogram(name: "triton__0d1de", linkageName: "triton__0d1de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
317 |
+
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
|
318 |
+
!9 = !{}
|
319 |
+
!10 = !DILocation(line: 21, column: 36, scope: !7)
|
320 |
+
!11 = !DILocation(line: 20, column: 28, scope: !7)
|
321 |
+
!12 = !DILocation(line: 20, column: 33, scope: !7)
|
322 |
+
!13 = !DILocation(line: 21, column: 23, scope: !7)
|
323 |
+
!14 = !DILocation(line: 24, column: 34, scope: !7)
|
324 |
+
!15 = !DILocation(line: 24, column: 39, scope: !7)
|
325 |
+
!16 = !DILocation(line: 24, column: 48, scope: !7)
|
326 |
+
!17 = !DILocation(line: 29, column: 18, scope: !7)
|
327 |
+
!18 = !DILocation(line: 30, column: 23, scope: !7)
|
328 |
+
!19 = !DILocation(line: 27, column: 18, scope: !7)
|
329 |
+
!20 = !DILocation(line: 32, column: 18, scope: !7)
|
330 |
+
!21 = !DILocation(line: 33, column: 18, scope: !7)
|
331 |
+
!22 = !DILocation(line: 35, column: 40, scope: !7)
|
332 |
+
!23 = !DILocation(line: 35, column: 4, scope: !7)
|
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ptx
ADDED
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1de
|
10 |
+
.global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
|
11 |
+
|
12 |
+
.visible .entry triton__0d1de(
|
13 |
+
.param .u64 triton__0d1de_param_0,
|
14 |
+
.param .u32 triton__0d1de_param_1
|
15 |
+
)
|
16 |
+
.maxntid 256, 1, 1
|
17 |
+
{
|
18 |
+
.reg .pred %p<9>;
|
19 |
+
.reg .b16 %rs<5>;
|
20 |
+
.reg .b32 %r<22>;
|
21 |
+
.reg .f32 %f<113>;
|
22 |
+
.reg .b64 %rd<6>;
|
23 |
+
.loc 1 18 0
|
24 |
+
$L__func_begin0:
|
25 |
+
.loc 1 18 0
|
26 |
+
|
27 |
+
ld.param.u64 %rd3, [triton__0d1de_param_0];
|
28 |
+
$L__tmp0:
|
29 |
+
.loc 1 21 36
|
30 |
+
mov.u32 %r5, %tid.x;
|
31 |
+
shl.b32 %r6, %r5, 1;
|
32 |
+
and.b32 %r7, %r6, 510;
|
33 |
+
.loc 1 20 28
|
34 |
+
mov.u32 %r1, %ctaid.x;
|
35 |
+
.loc 1 20 33
|
36 |
+
shl.b32 %r8, %r1, 9;
|
37 |
+
.loc 1 21 23
|
38 |
+
or.b32 %r9, %r8, %r7;
|
39 |
+
.loc 1 24 34
|
40 |
+
mul.wide.s32 %rd4, %r9, 2;
|
41 |
+
add.s64 %rd5, %rd3, %rd4;
|
42 |
+
mov.pred %p1, -1;
|
43 |
+
.loc 1 24 39
|
44 |
+
mov.u32 %r2, 0x0;
|
45 |
+
@%p1 ld.global.b32 { %r2 }, [ %rd5 + 0 ];
|
46 |
+
cvt.u16.u32 %rs1, %r2;
|
47 |
+
{ .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r2; }
|
48 |
+
.loc 1 24 48
|
49 |
+
cvt.f32.bf16 %r3, %rs1;
|
50 |
+
mov.b32 %f1, %r3;
|
51 |
+
cvt.f32.bf16 %r4, %rs2;
|
52 |
+
mov.b32 %f2, %r4;
|
53 |
+
.loc 1 29 18
|
54 |
+
mul.f32 %f3, %f1, 0f3F3504F3;
|
55 |
+
.loc 1 30 23
|
56 |
+
abs.ftz.f32 %f5, %f3;
|
57 |
+
setp.ge.f32 %p2, %f5, 0f3F8060FE;
|
58 |
+
mov.f32 %f101, 0f3789CA3C;
|
59 |
+
mov.f32 %f100, 0fB9F560B9;
|
60 |
+
mov.f32 %f99, 0f3BAC840B;
|
61 |
+
mov.f32 %f98, 0fBD0C8162;
|
62 |
+
mov.f32 %f97, 0f3E1CF906;
|
63 |
+
mov.f32 %f96, 0f3F6A937E;
|
64 |
+
mov.f32 %f95, 0f3F20D842;
|
65 |
+
mov.f32 %f102, %f5;
|
66 |
+
@%p2 bra $L__BB0_2;
|
67 |
+
.loc 1 0 23
|
68 |
+
mov.f32 %f101, 0f38B1E96A;
|
69 |
+
mov.f32 %f100, 0fBA574D20;
|
70 |
+
mov.f32 %f99, 0f3BAAD5EA;
|
71 |
+
mov.f32 %f98, 0fBCDC1BE7;
|
72 |
+
mov.f32 %f97, 0f3DE718AF;
|
73 |
+
mov.f32 %f96, 0fBEC093AC;
|
74 |
+
mov.f32 %f95, 0f3E0375D3;
|
75 |
+
.loc 1 30 23
|
76 |
+
mul.f32 %f102, %f3, %f3;
|
77 |
+
$L__BB0_2:
|
78 |
+
.loc 1 0 0
|
79 |
+
mul.f32 %f4, %f2, 0f3F3504F3;
|
80 |
+
.loc 1 30 23
|
81 |
+
setp.ltu.f32 %p3, %f5, 0f3F8060FE;
|
82 |
+
fma.rn.ftz.f32 %f45, %f101, %f102, %f100;
|
83 |
+
fma.rn.ftz.f32 %f46, %f45, %f102, %f99;
|
84 |
+
fma.rn.ftz.f32 %f47, %f46, %f102, %f98;
|
85 |
+
fma.rn.ftz.f32 %f48, %f47, %f102, %f97;
|
86 |
+
fma.rn.ftz.f32 %f49, %f48, %f102, %f96;
|
87 |
+
fma.rn.ftz.f32 %f50, %f49, %f102, %f95;
|
88 |
+
neg.f32 %f51, %f102;
|
89 |
+
selp.f32 %f52, %f51, %f3, %p2;
|
90 |
+
fma.rn.ftz.f32 %f103, %f50, %f52, %f52;
|
91 |
+
mov.f32 %f94, 0f3F800000;
|
92 |
+
@%p3 bra $L__BB0_4;
|
93 |
+
ex2.approx.ftz.f32 %f53, %f103;
|
94 |
+
sub.f32 %f55, %f94, %f53;
|
95 |
+
mov.b32 %r10, %f55;
|
96 |
+
mov.b32 %r11, %f3;
|
97 |
+
and.b32 %r12, %r11, -2147483648;
|
98 |
+
or.b32 %r13, %r12, %r10;
|
99 |
+
mov.b32 %f103, %r13;
|
100 |
+
$L__BB0_4:
|
101 |
+
abs.ftz.f32 %f18, %f4;
|
102 |
+
setp.ge.f32 %p5, %f18, 0f3F8060FE;
|
103 |
+
mov.f32 %f110, 0f3789CA3C;
|
104 |
+
mov.f32 %f109, 0fB9F560B9;
|
105 |
+
mov.f32 %f108, 0f3BAC840B;
|
106 |
+
mov.f32 %f107, 0fBD0C8162;
|
107 |
+
mov.f32 %f106, 0f3E1CF906;
|
108 |
+
mov.f32 %f105, 0f3F6A937E;
|
109 |
+
mov.f32 %f104, 0f3F20D842;
|
110 |
+
mov.f32 %f111, %f18;
|
111 |
+
@%p5 bra $L__BB0_6;
|
112 |
+
mul.f32 %f111, %f4, %f4;
|
113 |
+
mov.f32 %f110, 0f38B1E96A;
|
114 |
+
mov.f32 %f109, 0fBA574D20;
|
115 |
+
mov.f32 %f108, 0f3BAAD5EA;
|
116 |
+
mov.f32 %f107, 0fBCDC1BE7;
|
117 |
+
mov.f32 %f106, 0f3DE718AF;
|
118 |
+
mov.f32 %f105, 0fBEC093AC;
|
119 |
+
mov.f32 %f104, 0f3E0375D3;
|
120 |
+
$L__BB0_6:
|
121 |
+
setp.ltu.f32 %p6, %f18, 0f3F8060FE;
|
122 |
+
fma.rn.ftz.f32 %f70, %f110, %f111, %f109;
|
123 |
+
fma.rn.ftz.f32 %f71, %f70, %f111, %f108;
|
124 |
+
fma.rn.ftz.f32 %f72, %f71, %f111, %f107;
|
125 |
+
fma.rn.ftz.f32 %f73, %f72, %f111, %f106;
|
126 |
+
fma.rn.ftz.f32 %f74, %f73, %f111, %f105;
|
127 |
+
fma.rn.ftz.f32 %f75, %f74, %f111, %f104;
|
128 |
+
neg.f32 %f76, %f111;
|
129 |
+
selp.f32 %f77, %f76, %f4, %p5;
|
130 |
+
fma.rn.ftz.f32 %f112, %f75, %f77, %f77;
|
131 |
+
@%p6 bra $L__BB0_8;
|
132 |
+
ex2.approx.ftz.f32 %f78, %f112;
|
133 |
+
sub.f32 %f80, %f94, %f78;
|
134 |
+
mov.b32 %r14, %f80;
|
135 |
+
mov.b32 %r15, %f4;
|
136 |
+
and.b32 %r16, %r15, -2147483648;
|
137 |
+
or.b32 %r17, %r16, %r14;
|
138 |
+
mov.b32 %f112, %r17;
|
139 |
+
$L__BB0_8:
|
140 |
+
.loc 1 27 18
|
141 |
+
mul.f32 %f81, %f2, 0f3F000000;
|
142 |
+
mul.f32 %f82, %f1, 0f3F000000;
|
143 |
+
.loc 1 32 18
|
144 |
+
add.f32 %f83, %f103, 0f3F800000;
|
145 |
+
add.f32 %f84, %f112, 0f3F800000;
|
146 |
+
.loc 1 33 18
|
147 |
+
mul.f32 %f85, %f82, %f83;
|
148 |
+
mul.f32 %f86, %f81, %f84;
|
149 |
+
.loc 1 35 40
|
150 |
+
mov.b32 %r18, %f85;
|
151 |
+
cvt.rn.bf16.f32 %rs3, %r18;
|
152 |
+
mov.b32 %r19, %f86;
|
153 |
+
cvt.rn.bf16.f32 %rs4, %r19;
|
154 |
+
mov.b32 %r21, {%rs3, %rs4};
|
155 |
+
@%p1 st.global.b32 [ %rd5 + 0 ], { %r21 };
|
156 |
+
.loc 1 35 4
|
157 |
+
ret;
|
158 |
+
$L__tmp1:
|
159 |
+
$L__func_end0:
|
160 |
+
|
161 |
+
}
|
162 |
+
// .globl __nv_erff
|
163 |
+
.visible .func (.param .b32 func_retval0) __nv_erff(
|
164 |
+
.param .b32 __nv_erff_param_0
|
165 |
+
)
|
166 |
+
{
|
167 |
+
.reg .pred %p<4>;
|
168 |
+
.reg .b32 %r<5>;
|
169 |
+
.reg .f32 %f<49>;
|
170 |
+
$L__func_begin1:
|
171 |
+
|
172 |
+
ld.param.f32 %f14, [__nv_erff_param_0];
|
173 |
+
abs.ftz.f32 %f1, %f14;
|
174 |
+
setp.ge.f32 %p1, %f1, 0f3F8060FE;
|
175 |
+
mov.f32 %f46, 0f3789CA3C;
|
176 |
+
mov.f32 %f45, 0fB9F560B9;
|
177 |
+
mov.f32 %f44, 0f3BAC840B;
|
178 |
+
mov.f32 %f43, 0fBD0C8162;
|
179 |
+
mov.f32 %f42, 0f3E1CF906;
|
180 |
+
mov.f32 %f41, 0f3F6A937E;
|
181 |
+
mov.f32 %f40, 0f3F20D842;
|
182 |
+
mov.f32 %f47, %f1;
|
183 |
+
@%p1 bra $L__BB1_2;
|
184 |
+
mul.f32 %f47, %f14, %f14;
|
185 |
+
mov.f32 %f46, 0f38B1E96A;
|
186 |
+
mov.f32 %f45, 0fBA574D20;
|
187 |
+
mov.f32 %f44, 0f3BAAD5EA;
|
188 |
+
mov.f32 %f43, 0fBCDC1BE7;
|
189 |
+
mov.f32 %f42, 0f3DE718AF;
|
190 |
+
mov.f32 %f41, 0fBEC093AC;
|
191 |
+
mov.f32 %f40, 0f3E0375D3;
|
192 |
+
$L__BB1_2:
|
193 |
+
setp.ltu.f32 %p2, %f1, 0f3F8060FE;
|
194 |
+
fma.rn.ftz.f32 %f29, %f46, %f47, %f45;
|
195 |
+
fma.rn.ftz.f32 %f30, %f29, %f47, %f44;
|
196 |
+
fma.rn.ftz.f32 %f31, %f30, %f47, %f43;
|
197 |
+
fma.rn.ftz.f32 %f32, %f31, %f47, %f42;
|
198 |
+
fma.rn.ftz.f32 %f33, %f32, %f47, %f41;
|
199 |
+
fma.rn.ftz.f32 %f34, %f33, %f47, %f40;
|
200 |
+
neg.f32 %f35, %f47;
|
201 |
+
selp.f32 %f36, %f35, %f14, %p1;
|
202 |
+
fma.rn.ftz.f32 %f48, %f34, %f36, %f36;
|
203 |
+
@%p2 bra $L__BB1_4;
|
204 |
+
ex2.approx.ftz.f32 %f37, %f48;
|
205 |
+
mov.f32 %f38, 0f3F800000;
|
206 |
+
sub.f32 %f39, %f38, %f37;
|
207 |
+
mov.b32 %r1, %f39;
|
208 |
+
mov.b32 %r2, %f14;
|
209 |
+
and.b32 %r3, %r2, -2147483648;
|
210 |
+
or.b32 %r4, %r3, %r1;
|
211 |
+
mov.b32 %f48, %r4;
|
212 |
+
$L__BB1_4:
|
213 |
+
st.param.f32 [func_retval0+0], %f48;
|
214 |
+
ret;
|
215 |
+
$L__func_end1:
|
216 |
+
|
217 |
+
}
|
218 |
+
.file 1 "/tmp/torchinductor_root/af/cafucwnmq4o436kwzkmrinerrnocxll7q6wsadcl726g6cradipo.py"
|
219 |
+
.section .debug_abbrev
|
220 |
+
{
|
221 |
+
.b8 1
|
222 |
+
.b8 17
|
223 |
+
.b8 1
|
224 |
+
.b8 37
|
225 |
+
.b8 8
|
226 |
+
.b8 19
|
227 |
+
.b8 5
|
228 |
+
.b8 3
|
229 |
+
.b8 8
|
230 |
+
.b8 16
|
231 |
+
.b8 6
|
232 |
+
.b8 27
|
233 |
+
.b8 8
|
234 |
+
.b8 180
|
235 |
+
.b8 66
|
236 |
+
.b8 12
|
237 |
+
.b8 17
|
238 |
+
.b8 1
|
239 |
+
.b8 18
|
240 |
+
.b8 1
|
241 |
+
.b8 0
|
242 |
+
.b8 0
|
243 |
+
.b8 2
|
244 |
+
.b8 46
|
245 |
+
.b8 0
|
246 |
+
.b8 17
|
247 |
+
.b8 1
|
248 |
+
.b8 18
|
249 |
+
.b8 1
|
250 |
+
.b8 64
|
251 |
+
.b8 10
|
252 |
+
.b8 135
|
253 |
+
.b8 64
|
254 |
+
.b8 8
|
255 |
+
.b8 3
|
256 |
+
.b8 8
|
257 |
+
.b8 58
|
258 |
+
.b8 11
|
259 |
+
.b8 59
|
260 |
+
.b8 11
|
261 |
+
.b8 63
|
262 |
+
.b8 12
|
263 |
+
.b8 0
|
264 |
+
.b8 0
|
265 |
+
.b8 0
|
266 |
+
}
|
267 |
+
.section .debug_info
|
268 |
+
{
|
269 |
+
.b32 172
|
270 |
+
.b8 2
|
271 |
+
.b8 0
|
272 |
+
.b32 .debug_abbrev
|
273 |
+
.b8 8
|
274 |
+
.b8 1
|
275 |
+
.b8 116
|
276 |
+
.b8 114
|
277 |
+
.b8 105
|
278 |
+
.b8 116
|
279 |
+
.b8 111
|
280 |
+
.b8 110
|
281 |
+
.b8 0
|
282 |
+
.b8 2
|
283 |
+
.b8 0
|
284 |
+
.b8 99
|
285 |
+
.b8 97
|
286 |
+
.b8 102
|
287 |
+
.b8 117
|
288 |
+
.b8 99
|
289 |
+
.b8 119
|
290 |
+
.b8 110
|
291 |
+
.b8 109
|
292 |
+
.b8 113
|
293 |
+
.b8 52
|
294 |
+
.b8 111
|
295 |
+
.b8 52
|
296 |
+
.b8 51
|
297 |
+
.b8 54
|
298 |
+
.b8 107
|
299 |
+
.b8 119
|
300 |
+
.b8 122
|
301 |
+
.b8 107
|
302 |
+
.b8 109
|
303 |
+
.b8 114
|
304 |
+
.b8 105
|
305 |
+
.b8 110
|
306 |
+
.b8 101
|
307 |
+
.b8 114
|
308 |
+
.b8 114
|
309 |
+
.b8 110
|
310 |
+
.b8 111
|
311 |
+
.b8 99
|
312 |
+
.b8 120
|
313 |
+
.b8 108
|
314 |
+
.b8 108
|
315 |
+
.b8 55
|
316 |
+
.b8 113
|
317 |
+
.b8 54
|
318 |
+
.b8 119
|
319 |
+
.b8 115
|
320 |
+
.b8 97
|
321 |
+
.b8 100
|
322 |
+
.b8 99
|
323 |
+
.b8 108
|
324 |
+
.b8 55
|
325 |
+
.b8 50
|
326 |
+
.b8 54
|
327 |
+
.b8 103
|
328 |
+
.b8 54
|
329 |
+
.b8 99
|
330 |
+
.b8 114
|
331 |
+
.b8 97
|
332 |
+
.b8 100
|
333 |
+
.b8 105
|
334 |
+
.b8 112
|
335 |
+
.b8 111
|
336 |
+
.b8 46
|
337 |
+
.b8 112
|
338 |
+
.b8 121
|
339 |
+
.b8 0
|
340 |
+
.b32 .debug_line
|
341 |
+
.b8 47
|
342 |
+
.b8 116
|
343 |
+
.b8 109
|
344 |
+
.b8 112
|
345 |
+
.b8 47
|
346 |
+
.b8 116
|
347 |
+
.b8 111
|
348 |
+
.b8 114
|
349 |
+
.b8 99
|
350 |
+
.b8 104
|
351 |
+
.b8 105
|
352 |
+
.b8 110
|
353 |
+
.b8 100
|
354 |
+
.b8 117
|
355 |
+
.b8 99
|
356 |
+
.b8 116
|
357 |
+
.b8 111
|
358 |
+
.b8 114
|
359 |
+
.b8 95
|
360 |
+
.b8 114
|
361 |
+
.b8 111
|
362 |
+
.b8 111
|
363 |
+
.b8 116
|
364 |
+
.b8 47
|
365 |
+
.b8 97
|
366 |
+
.b8 102
|
367 |
+
.b8 0
|
368 |
+
.b8 1
|
369 |
+
.b64 $L__func_begin0
|
370 |
+
.b64 $L__func_end0
|
371 |
+
.b8 2
|
372 |
+
.b64 $L__func_begin0
|
373 |
+
.b64 $L__func_end0
|
374 |
+
.b8 1
|
375 |
+
.b8 156
|
376 |
+
.b8 116
|
377 |
+
.b8 114
|
378 |
+
.b8 105
|
379 |
+
.b8 116
|
380 |
+
.b8 111
|
381 |
+
.b8 110
|
382 |
+
.b8 95
|
383 |
+
.b8 95
|
384 |
+
.b8 48
|
385 |
+
.b8 100
|
386 |
+
.b8 49
|
387 |
+
.b8 100
|
388 |
+
.b8 101
|
389 |
+
.b8 0
|
390 |
+
.b8 116
|
391 |
+
.b8 114
|
392 |
+
.b8 105
|
393 |
+
.b8 116
|
394 |
+
.b8 111
|
395 |
+
.b8 110
|
396 |
+
.b8 95
|
397 |
+
.b8 95
|
398 |
+
.b8 48
|
399 |
+
.b8 100
|
400 |
+
.b8 49
|
401 |
+
.b8 100
|
402 |
+
.b8 101
|
403 |
+
.b8 0
|
404 |
+
.b8 1
|
405 |
+
.b8 18
|
406 |
+
.b8 1
|
407 |
+
.b8 0
|
408 |
+
}
|
409 |
+
.section .debug_pubnames
|
410 |
+
{
|
411 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
412 |
+
$L__pubNames_start0:
|
413 |
+
.b8 2
|
414 |
+
.b8 0
|
415 |
+
.b32 .debug_info
|
416 |
+
.b32 176
|
417 |
+
.b32 125
|
418 |
+
.b8 116
|
419 |
+
.b8 114
|
420 |
+
.b8 105
|
421 |
+
.b8 116
|
422 |
+
.b8 111
|
423 |
+
.b8 110
|
424 |
+
.b8 95
|
425 |
+
.b8 95
|
426 |
+
.b8 48
|
427 |
+
.b8 100
|
428 |
+
.b8 49
|
429 |
+
.b8 100
|
430 |
+
.b8 101
|
431 |
+
.b8 0
|
432 |
+
.b32 0
|
433 |
+
$L__pubNames_end0:
|
434 |
+
}
|
435 |
+
.section .debug_pubtypes
|
436 |
+
{
|
437 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
438 |
+
$L__pubTypes_start0:
|
439 |
+
.b8 2
|
440 |
+
.b8 0
|
441 |
+
.b32 .debug_info
|
442 |
+
.b32 176
|
443 |
+
.b32 0
|
444 |
+
$L__pubTypes_end0:
|
445 |
+
}
|
446 |
+
.section .debug_loc { }
|
.triton/dump/1e922bbbab749da355e4bad9c6b245e6/triton_.ttgir
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#blocked = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [8], order = [0], CTAsPerCGA = [1], CTASplitNum = [1], CTAOrder = [0]}>
|
2 |
+
module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
|
3 |
+
tt.func public @triton__0d1de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
4 |
+
%cst = arith.constant dense<1.000000e+00> : tensor<512xf32, #blocked>
|
5 |
+
%cst_0 = arith.constant dense<0.707106769> : tensor<512xf32, #blocked>
|
6 |
+
%cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32, #blocked>
|
7 |
+
%c512_i32 = arith.constant 512 : i32
|
8 |
+
%0 = tt.get_program_id x : i32
|
9 |
+
%1 = arith.muli %0, %c512_i32 : i32
|
10 |
+
%2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked>
|
11 |
+
%3 = tt.splat %1 : (i32) -> tensor<512xi32, #blocked>
|
12 |
+
%4 = arith.addi %3, %2 : tensor<512xi32, #blocked>
|
13 |
+
%5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
|
14 |
+
%6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
|
15 |
+
%7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16, #blocked>
|
16 |
+
%8 = arith.extf %7 : tensor<512xbf16, #blocked> to tensor<512xf32, #blocked>
|
17 |
+
%9 = arith.mulf %8, %cst_1 : tensor<512xf32, #blocked>
|
18 |
+
%10 = arith.mulf %8, %cst_0 : tensor<512xf32, #blocked>
|
19 |
+
%11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32, #blocked>) -> tensor<512xf32, #blocked>
|
20 |
+
%12 = arith.addf %11, %cst : tensor<512xf32, #blocked>
|
21 |
+
%13 = arith.mulf %9, %12 : tensor<512xf32, #blocked>
|
22 |
+
%14 = arith.truncf %13 : tensor<512xf32, #blocked> to tensor<512xbf16, #blocked>
|
23 |
+
tt.store %6, %14 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16, #blocked>
|
24 |
+
tt.return
|
25 |
+
}
|
26 |
+
}
|
.triton/dump/305a9479aab997a3a16bfe46bb303a50/triton_.cubin
ADDED
Binary file (30.4 kB). View file
|
|
.triton/dump/345a87a492fd703c73ab83265a21fcb6/triton_.cubin
ADDED
Binary file (52.2 kB). View file
|
|
.triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ptx
ADDED
@@ -0,0 +1,809 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1d2d3d4d5d6de7de
|
10 |
+
.extern .func __assertfail
|
11 |
+
(
|
12 |
+
.param .b64 __assertfail_param_0,
|
13 |
+
.param .b64 __assertfail_param_1,
|
14 |
+
.param .b32 __assertfail_param_2,
|
15 |
+
.param .b64 __assertfail_param_3,
|
16 |
+
.param .b64 __assertfail_param_4
|
17 |
+
)
|
18 |
+
;
|
19 |
+
.global .align 1 .b8 assertFunc_1[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
|
20 |
+
.global .align 1 .b8 assertFile_1[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
|
21 |
+
.global .align 1 .b8 assertMessage_1[39] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 49, 54, 32, 60, 32, 53, 48, 50, 53, 55};
|
22 |
+
.global .align 1 .b8 assertFunc_0[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
|
23 |
+
.global .align 1 .b8 assertFile_0[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
|
24 |
+
.global .align 1 .b8 assertMessage_0[38] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 51, 32, 60, 32, 53, 48, 50, 53, 55};
|
25 |
+
.global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
|
26 |
+
|
27 |
+
.visible .entry triton__0d1d2d3d4d5d6de7de(
|
28 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_0,
|
29 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_1,
|
30 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_2,
|
31 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_3,
|
32 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_4,
|
33 |
+
.param .u64 triton__0d1d2d3d4d5d6de7de_param_5,
|
34 |
+
.param .u32 triton__0d1d2d3d4d5d6de7de_param_6,
|
35 |
+
.param .u32 triton__0d1d2d3d4d5d6de7de_param_7
|
36 |
+
)
|
37 |
+
.maxntid 256, 1, 1
|
38 |
+
{
|
39 |
+
.reg .pred %p<33>;
|
40 |
+
.reg .b16 %rs<13>;
|
41 |
+
.reg .b32 %r<93>;
|
42 |
+
.reg .f32 %f<79>;
|
43 |
+
.reg .b64 %rd<92>;
|
44 |
+
.loc 1 18 0
|
45 |
+
$L__func_begin0:
|
46 |
+
.loc 1 18 0
|
47 |
+
|
48 |
+
ld.param.u64 %rd37, [triton__0d1d2d3d4d5d6de7de_param_4];
|
49 |
+
ld.param.u64 %rd36, [triton__0d1d2d3d4d5d6de7de_param_3];
|
50 |
+
ld.param.u64 %rd35, [triton__0d1d2d3d4d5d6de7de_param_2];
|
51 |
+
ld.param.u64 %rd34, [triton__0d1d2d3d4d5d6de7de_param_1];
|
52 |
+
ld.param.u64 %rd43, [triton__0d1d2d3d4d5d6de7de_param_0];
|
53 |
+
$L__tmp0:
|
54 |
+
.loc 1 22 44
|
55 |
+
mov.u32 %r1, %tid.x;
|
56 |
+
bfe.u32 %r2, %r1, 2, 6;
|
57 |
+
and.b32 %r16, %r1, 63;
|
58 |
+
.loc 1 24 33
|
59 |
+
and.b32 %r3, %r1, 3;
|
60 |
+
.loc 1 21 28
|
61 |
+
mov.u32 %r15, %ctaid.x;
|
62 |
+
.loc 1 21 33
|
63 |
+
shl.b32 %r17, %r15, 6;
|
64 |
+
.loc 1 22 23
|
65 |
+
or.b32 %r18, %r17, %r2;
|
66 |
+
or.b32 %r19, %r17, %r16;
|
67 |
+
.loc 1 26 30
|
68 |
+
mul.wide.s32 %rd44, %r18, 8;
|
69 |
+
add.s64 %rd40, %rd43, %rd44;
|
70 |
+
mul.wide.s32 %rd45, %r19, 8;
|
71 |
+
add.s64 %rd42, %rd43, %rd45;
|
72 |
+
mov.pred %p13, -1;
|
73 |
+
.loc 1 26 35
|
74 |
+
mov.u64 %rd39, 0x0;
|
75 |
+
@%p13 ld.global.L1::evict_last.b64 { %rd39 }, [ %rd40 + 0 ];
|
76 |
+
mov.u64 %rd41, 0x0;
|
77 |
+
@%p13 ld.global.L1::evict_last.b64 { %rd41 }, [ %rd42 + 0 ];
|
78 |
+
.loc 1 27 18
|
79 |
+
bfe.s32 %r20, %r15, 25, 1;
|
80 |
+
shr.u32 %r21, %r20, 23;
|
81 |
+
add.s32 %r22, %r18, %r21;
|
82 |
+
and.b32 %r23, %r22, 16776704;
|
83 |
+
sub.s32 %r24, %r18, %r23;
|
84 |
+
.loc 1 35 44
|
85 |
+
shl.b32 %r5, %r24, 8;
|
86 |
+
.loc 1 37 22
|
87 |
+
add.s64 %rd46, %rd41, 50257;
|
88 |
+
.loc 1 38 22
|
89 |
+
setp.lt.s64 %p3, %rd39, 0;
|
90 |
+
setp.lt.s64 %p4, %rd41, 0;
|
91 |
+
.loc 1 39 36
|
92 |
+
selp.b64 %rd47, %rd46, %rd41, %p4;
|
93 |
+
.loc 1 40 40
|
94 |
+
setp.gt.u64 %p5, %rd47, 50256;
|
95 |
+
.loc 1 41 44
|
96 |
+
shl.b64 %rd48, %rd39, 8;
|
97 |
+
add.s64 %rd49, %rd48, 12865792;
|
98 |
+
selp.b64 %rd2, %rd49, %rd48, %p3;
|
99 |
+
mov.u16 %rs12, 0;
|
100 |
+
mov.b32 %r76, 0;
|
101 |
+
mov.b32 %r88, 883;
|
102 |
+
mov.u64 %rd81, 1;
|
103 |
+
.loc 1 40 55
|
104 |
+
@%p5 bra $L__BB0_3;
|
105 |
+
bra.uni $L__BB0_1;
|
106 |
+
$L__BB0_3:
|
107 |
+
.loc 1 31 36
|
108 |
+
shl.b64 %rd55, %rd2, 2;
|
109 |
+
mul.wide.u32 %rd88, %r3, 4;
|
110 |
+
add.s64 %rd87, %rd55, %rd88;
|
111 |
+
add.s64 %rd83, %rd34, %rd87;
|
112 |
+
shl.b32 %r42, %r15, 14;
|
113 |
+
shl.b32 %r43, %r2, 8;
|
114 |
+
or.b32 %r44, %r42, %r43;
|
115 |
+
or.b32 %r91, %r44, %r3;
|
116 |
+
add.s32 %r45, %r5, %r3;
|
117 |
+
mul.wide.s32 %rd86, %r45, 4;
|
118 |
+
add.s64 %rd82, %rd35, %rd86;
|
119 |
+
mov.f32 %f78, 0f00000000;
|
120 |
+
mov.b32 %r89, -4;
|
121 |
+
mov.f32 %f77, %f78;
|
122 |
+
mov.f32 %f76, %f78;
|
123 |
+
$L__BB0_4:
|
124 |
+
.loc 1 35 50
|
125 |
+
mov.u32 %r46, 0x0;
|
126 |
+
@%p13 ld.global.L1::evict_last.b32 { %r46 }, [ %rd82 + 0 ];
|
127 |
+
@!%p13 mov.u32 %r46, %r76;
|
128 |
+
mov.b32 %f31, %r46;
|
129 |
+
.loc 1 31 36
|
130 |
+
add.s32 %r89, %r89, 4;
|
131 |
+
.loc 1 36 34
|
132 |
+
add.s32 %r54, %r89, %r91;
|
133 |
+
mul.wide.s32 %rd59, %r54, 2;
|
134 |
+
add.s64 %rd57, %rd36, %rd59;
|
135 |
+
.loc 1 36 50
|
136 |
+
mov.u16 %rs4, 0x0;
|
137 |
+
@%p13 ld.global.L1::evict_last.b16 { %rs4 }, [ %rd57 + 0 ];
|
138 |
+
@!%p13 mov.u16 %rs4, %rs12;
|
139 |
+
.loc 1 36 101
|
140 |
+
cvt.f32.bf16 %r48, %rs4;
|
141 |
+
mov.b32 %f32, %r48;
|
142 |
+
.loc 1 40 55
|
143 |
+
mov.u64 %rd60, assertMessage_0;
|
144 |
+
cvta.global.u64 %rd61, %rd60;
|
145 |
+
mov.u64 %rd62, assertFile_0;
|
146 |
+
cvta.global.u64 %rd63, %rd62;
|
147 |
+
mov.u64 %rd64, assertFunc_0;
|
148 |
+
cvta.global.u64 %rd65, %rd64;
|
149 |
+
{ // callseq 10, 0
|
150 |
+
.reg .b32 temp_param_reg;
|
151 |
+
.param .b64 param0;
|
152 |
+
st.param.b64 [param0+0], %rd61;
|
153 |
+
.param .b64 param1;
|
154 |
+
st.param.b64 [param1+0], %rd63;
|
155 |
+
.param .b32 param2;
|
156 |
+
st.param.b32 [param2+0], %r88;
|
157 |
+
.param .b64 param3;
|
158 |
+
st.param.b64 [param3+0], %rd65;
|
159 |
+
.param .b64 param4;
|
160 |
+
st.param.b64 [param4+0], %rd81;
|
161 |
+
call.uni
|
162 |
+
__assertfail,
|
163 |
+
(
|
164 |
+
param0,
|
165 |
+
param1,
|
166 |
+
param2,
|
167 |
+
param3,
|
168 |
+
param4
|
169 |
+
);
|
170 |
+
} // callseq 10
|
171 |
+
.loc 1 41 52
|
172 |
+
mov.u32 %r49, 0x0;
|
173 |
+
@%p13 ld.global.L1::evict_last.b32 { %r49 }, [ %rd83 + 0 ];
|
174 |
+
@!%p13 mov.u32 %r49, %r76;
|
175 |
+
mov.b32 %f33, %r49;
|
176 |
+
.loc 1 42 22
|
177 |
+
add.f32 %f34, %f31, %f33;
|
178 |
+
.loc 1 44 22
|
179 |
+
add.f32 %f35, %f32, %f34;
|
180 |
+
$L__tmp1:
|
181 |
+
.loc 2 96 20
|
182 |
+
sub.f32 %f36, %f35, %f76;
|
183 |
+
.loc 2 97 26
|
184 |
+
add.f32 %f78, %f78, 0f3F800000;
|
185 |
+
.loc 2 98 30
|
186 |
+
mov.b32 %r52, %f36;
|
187 |
+
mov.b32 %r53, %f78;
|
188 |
+
div.full.f32 %r51, %r52, %r53;
|
189 |
+
mov.b32 %f37, %r51;
|
190 |
+
.loc 2 98 22
|
191 |
+
add.f32 %f76, %f76, %f37;
|
192 |
+
.loc 2 101 30
|
193 |
+
sub.f32 %f38, %f35, %f76;
|
194 |
+
$L__tmp2:
|
195 |
+
.loc 1 50 50
|
196 |
+
fma.rn.f32 %f77, %f36, %f38, %f77;
|
197 |
+
.loc 1 31 36
|
198 |
+
add.s64 %rd83, %rd83, 16;
|
199 |
+
add.s64 %rd82, %rd82, 16;
|
200 |
+
setp.lt.u32 %p19, %r89, 252;
|
201 |
+
@%p19 bra $L__BB0_4;
|
202 |
+
bra.uni $L__BB0_5;
|
203 |
+
$L__BB0_1:
|
204 |
+
.loc 1 0 36
|
205 |
+
mov.b32 %r90, -4;
|
206 |
+
.loc 1 31 36
|
207 |
+
shl.b64 %rd50, %rd2, 2;
|
208 |
+
mul.wide.u32 %rd88, %r3, 4;
|
209 |
+
add.s64 %rd87, %rd50, %rd88;
|
210 |
+
add.s64 %rd85, %rd34, %rd87;
|
211 |
+
shl.b32 %r27, %r15, 14;
|
212 |
+
shl.b32 %r28, %r2, 8;
|
213 |
+
or.b32 %r29, %r27, %r28;
|
214 |
+
or.b32 %r91, %r29, %r3;
|
215 |
+
add.s32 %r30, %r5, %r3;
|
216 |
+
mul.wide.s32 %rd86, %r30, 4;
|
217 |
+
add.s64 %rd84, %rd35, %rd86;
|
218 |
+
mov.f32 %f78, 0f00000000;
|
219 |
+
mov.f32 %f77, %f78;
|
220 |
+
mov.f32 %f76, %f78;
|
221 |
+
$L__BB0_2:
|
222 |
+
.loc 1 35 50
|
223 |
+
mov.u32 %r31, 0x0;
|
224 |
+
@%p13 ld.global.L1::evict_last.b32 { %r31 }, [ %rd84 + 0 ];
|
225 |
+
@!%p13 mov.u32 %r31, %r76;
|
226 |
+
mov.b32 %f22, %r31;
|
227 |
+
.loc 1 31 36
|
228 |
+
add.s32 %r90, %r90, 4;
|
229 |
+
.loc 1 36 34
|
230 |
+
add.s32 %r39, %r90, %r91;
|
231 |
+
mul.wide.s32 %rd54, %r39, 2;
|
232 |
+
add.s64 %rd52, %rd36, %rd54;
|
233 |
+
.loc 1 36 50
|
234 |
+
mov.u16 %rs1, 0x0;
|
235 |
+
@%p13 ld.global.L1::evict_last.b16 { %rs1 }, [ %rd52 + 0 ];
|
236 |
+
@!%p13 mov.u16 %rs1, %rs12;
|
237 |
+
.loc 1 36 101
|
238 |
+
cvt.f32.bf16 %r33, %rs1;
|
239 |
+
mov.b32 %f23, %r33;
|
240 |
+
.loc 1 41 52
|
241 |
+
mov.u32 %r34, 0x0;
|
242 |
+
@%p13 ld.global.L1::evict_last.b32 { %r34 }, [ %rd85 + 0 ];
|
243 |
+
@!%p13 mov.u32 %r34, %r76;
|
244 |
+
mov.b32 %f24, %r34;
|
245 |
+
.loc 1 42 22
|
246 |
+
add.f32 %f25, %f22, %f24;
|
247 |
+
.loc 1 44 22
|
248 |
+
add.f32 %f26, %f23, %f25;
|
249 |
+
$L__tmp3:
|
250 |
+
.loc 2 96 20
|
251 |
+
sub.f32 %f27, %f26, %f76;
|
252 |
+
.loc 2 97 26
|
253 |
+
add.f32 %f78, %f78, 0f3F800000;
|
254 |
+
.loc 2 98 30
|
255 |
+
mov.b32 %r37, %f27;
|
256 |
+
mov.b32 %r38, %f78;
|
257 |
+
div.full.f32 %r36, %r37, %r38;
|
258 |
+
mov.b32 %f28, %r36;
|
259 |
+
.loc 2 98 22
|
260 |
+
add.f32 %f76, %f76, %f28;
|
261 |
+
.loc 2 101 30
|
262 |
+
sub.f32 %f29, %f26, %f76;
|
263 |
+
$L__tmp4:
|
264 |
+
.loc 1 50 50
|
265 |
+
fma.rn.f32 %f77, %f27, %f29, %f77;
|
266 |
+
.loc 1 31 36
|
267 |
+
add.s64 %rd85, %rd85, 16;
|
268 |
+
add.s64 %rd84, %rd84, 16;
|
269 |
+
setp.lt.u32 %p12, %r90, 252;
|
270 |
+
@%p12 bra $L__BB0_2;
|
271 |
+
$L__BB0_5:
|
272 |
+
.loc 1 0 36
|
273 |
+
ld.param.u64 %rd38, [triton__0d1d2d3d4d5d6de7de_param_5];
|
274 |
+
$L__tmp5:
|
275 |
+
.loc 2 120 46
|
276 |
+
mov.b32 %r66, %f76;
|
277 |
+
shfl.sync.bfly.b32 %r67, %r66, 2, 31, -1;
|
278 |
+
mov.b32 %f39, %r67;
|
279 |
+
mov.b32 %r68, %f77;
|
280 |
+
shfl.sync.bfly.b32 %r69, %r68, 2, 31, -1;
|
281 |
+
mov.b32 %f40, %r69;
|
282 |
+
mov.b32 %r70, %f78;
|
283 |
+
shfl.sync.bfly.b32 %r57, %r70, 2, 31, -1;
|
284 |
+
mov.b32 %f41, %r57;
|
285 |
+
$L__tmp6:
|
286 |
+
.loc 2 108 21
|
287 |
+
sub.f32 %f42, %f39, %f76;
|
288 |
+
.loc 2 109 28
|
289 |
+
add.f32 %f43, %f78, %f41;
|
290 |
+
.loc 2 110 39
|
291 |
+
setp.eq.f32 %p20, %f43, 0f00000000;
|
292 |
+
.loc 2 110 60
|
293 |
+
mov.b32 %r58, %f43;
|
294 |
+
div.full.f32 %r56, %r57, %r58;
|
295 |
+
mov.b32 %f44, %r56;
|
296 |
+
.loc 2 110 49
|
297 |
+
selp.f32 %f45, 0f00000000, %f44, %p20;
|
298 |
+
.loc 2 112 17
|
299 |
+
fma.rn.f32 %f46, %f42, %f45, %f76;
|
300 |
+
.loc 2 113 15
|
301 |
+
add.f32 %f47, %f77, %f40;
|
302 |
+
.loc 2 113 30
|
303 |
+
mul.f32 %f48, %f42, %f42;
|
304 |
+
.loc 2 113 38
|
305 |
+
mul.f32 %f49, %f78, %f48;
|
306 |
+
.loc 2 113 22
|
307 |
+
fma.rn.f32 %f50, %f49, %f45, %f47;
|
308 |
+
$L__tmp7:
|
309 |
+
.loc 2 120 46
|
310 |
+
mov.b32 %r71, %f46;
|
311 |
+
shfl.sync.bfly.b32 %r72, %r71, 1, 31, -1;
|
312 |
+
mov.b32 %f51, %r72;
|
313 |
+
mov.b32 %r73, %f50;
|
314 |
+
shfl.sync.bfly.b32 %r74, %r73, 1, 31, -1;
|
315 |
+
mov.b32 %f52, %r74;
|
316 |
+
shfl.sync.bfly.b32 %r60, %r58, 1, 31, -1;
|
317 |
+
mov.b32 %f53, %r60;
|
318 |
+
$L__tmp8:
|
319 |
+
.loc 2 108 21
|
320 |
+
sub.f32 %f54, %f51, %f46;
|
321 |
+
.loc 2 109 28
|
322 |
+
add.f32 %f55, %f43, %f53;
|
323 |
+
.loc 2 110 39
|
324 |
+
setp.eq.f32 %p21, %f55, 0f00000000;
|
325 |
+
.loc 2 110 60
|
326 |
+
mov.b32 %r61, %f55;
|
327 |
+
div.full.f32 %r59, %r60, %r61;
|
328 |
+
mov.b32 %f56, %r59;
|
329 |
+
.loc 2 110 49
|
330 |
+
selp.f32 %f57, 0f00000000, %f56, %p21;
|
331 |
+
.loc 2 112 17
|
332 |
+
fma.rn.f32 %f16, %f54, %f57, %f46;
|
333 |
+
.loc 2 113 15
|
334 |
+
add.f32 %f58, %f50, %f52;
|
335 |
+
.loc 2 113 30
|
336 |
+
mul.f32 %f59, %f54, %f54;
|
337 |
+
.loc 2 113 38
|
338 |
+
mul.f32 %f60, %f43, %f59;
|
339 |
+
.loc 2 113 22
|
340 |
+
fma.rn.f32 %f61, %f57, %f60, %f58;
|
341 |
+
$L__tmp9:
|
342 |
+
.loc 1 75 24
|
343 |
+
mov.b32 %r63, %f61;
|
344 |
+
mov.b32 %r64, 1132462080;
|
345 |
+
div.full.f32 %r62, %r63, %r64;
|
346 |
+
mov.b32 %f62, %r62;
|
347 |
+
.loc 1 77 24
|
348 |
+
add.f32 %f17, %f62, 0f3727C5AC;
|
349 |
+
.loc 1 58 36
|
350 |
+
add.s64 %rd91, %rd34, %rd87;
|
351 |
+
add.s64 %rd90, %rd37, %rd88;
|
352 |
+
add.s64 %rd89, %rd35, %rd86;
|
353 |
+
mov.b32 %r92, -4;
|
354 |
+
setp.lt.u64 %p28, %rd47, 50257;
|
355 |
+
rsqrt.approx.ftz.f32 %f67, %f17;
|
356 |
+
bra.uni $L__BB0_6;
|
357 |
+
$L__BB0_8:
|
358 |
+
.loc 1 0 0
|
359 |
+
mov.b32 %f18, %r75;
|
360 |
+
cvt.s64.s32 %rd30, %r81;
|
361 |
+
cvt.f32.bf16 %r77, %rs7;
|
362 |
+
mov.b32 %f19, %r77;
|
363 |
+
mov.b32 %f20, %r78;
|
364 |
+
.loc 1 69 54
|
365 |
+
mov.u32 %r83, 0x0;
|
366 |
+
@%p13 ld.global.L1::evict_first.b32 { %r83 }, [ %rd91 + 0 ];
|
367 |
+
@!%p13 mov.u32 %r83, %r76;
|
368 |
+
mov.b32 %f63, %r83;
|
369 |
+
.loc 1 70 24
|
370 |
+
add.f32 %f64, %f18, %f63;
|
371 |
+
.loc 1 72 24
|
372 |
+
add.f32 %f65, %f19, %f64;
|
373 |
+
.loc 1 73 24
|
374 |
+
sub.f32 %f66, %f65, %f16;
|
375 |
+
.loc 1 79 24
|
376 |
+
mul.f32 %f68, %f66, %f67;
|
377 |
+
.loc 1 80 24
|
378 |
+
mul.f32 %f69, %f68, %f20;
|
379 |
+
.loc 1 82 29
|
380 |
+
shl.b64 %rd80, %rd30, 1;
|
381 |
+
add.s64 %rd79, %rd38, %rd80;
|
382 |
+
.loc 1 82 52
|
383 |
+
mov.b32 %r85, %f69;
|
384 |
+
cvt.rn.bf16.f32 %rs10, %r85;
|
385 |
+
@%p13 st.global.b16 [ %rd79 + 0 ], { %rs10 };
|
386 |
+
.loc 1 58 36
|
387 |
+
add.s32 %r92, %r92, 4;
|
388 |
+
add.s64 %rd91, %rd91, 16;
|
389 |
+
add.s64 %rd90, %rd90, 16;
|
390 |
+
add.s64 %rd89, %rd89, 16;
|
391 |
+
setp.lt.u32 %p32, %r92, 252;
|
392 |
+
@%p32 bra $L__BB0_6;
|
393 |
+
bra.uni $L__BB0_9;
|
394 |
+
$L__BB0_6:
|
395 |
+
.loc 1 62 51
|
396 |
+
mov.u32 %r75, 0x0;
|
397 |
+
@%p13 ld.global.L1::evict_last.b32 { %r75 }, [ %rd89 + 0 ];
|
398 |
+
@!%p13 mov.u32 %r75, %r76;
|
399 |
+
.loc 1 63 35
|
400 |
+
add.s32 %r80, %r91, %r92;
|
401 |
+
add.s32 %r81, %r80, 4;
|
402 |
+
mul.wide.s32 %rd70, %r81, 2;
|
403 |
+
add.s64 %rd68, %rd36, %rd70;
|
404 |
+
.loc 1 63 51
|
405 |
+
mov.u16 %rs7, 0x0;
|
406 |
+
@%p13 ld.global.L1::evict_first.b16 { %rs7 }, [ %rd68 + 0 ];
|
407 |
+
@!%p13 mov.u16 %rs7, %rs12;
|
408 |
+
.loc 1 64 40
|
409 |
+
mov.u32 %r78, 0x0;
|
410 |
+
@%p13 ld.global.L1::evict_last.b32 { %r78 }, [ %rd90 + 0 ];
|
411 |
+
@!%p13 mov.u32 %r78, %r76;
|
412 |
+
.loc 1 68 57
|
413 |
+
@%p28 bra $L__BB0_8;
|
414 |
+
mov.u64 %rd71, assertMessage_1;
|
415 |
+
cvta.global.u64 %rd72, %rd71;
|
416 |
+
mov.u64 %rd73, assertFile_1;
|
417 |
+
cvta.global.u64 %rd74, %rd73;
|
418 |
+
mov.u64 %rd75, assertFunc_1;
|
419 |
+
cvta.global.u64 %rd76, %rd75;
|
420 |
+
{ // callseq 11, 0
|
421 |
+
.reg .b32 temp_param_reg;
|
422 |
+
.param .b64 param0;
|
423 |
+
st.param.b64 [param0+0], %rd72;
|
424 |
+
.param .b64 param1;
|
425 |
+
st.param.b64 [param1+0], %rd74;
|
426 |
+
.param .b32 param2;
|
427 |
+
st.param.b32 [param2+0], %r88;
|
428 |
+
.param .b64 param3;
|
429 |
+
st.param.b64 [param3+0], %rd76;
|
430 |
+
.param .b64 param4;
|
431 |
+
st.param.b64 [param4+0], %rd81;
|
432 |
+
call.uni
|
433 |
+
__assertfail,
|
434 |
+
(
|
435 |
+
param0,
|
436 |
+
param1,
|
437 |
+
param2,
|
438 |
+
param3,
|
439 |
+
param4
|
440 |
+
);
|
441 |
+
} // callseq 11
|
442 |
+
bra.uni $L__BB0_8;
|
443 |
+
$L__BB0_9:
|
444 |
+
.loc 1 58 4
|
445 |
+
ret;
|
446 |
+
$L__tmp10:
|
447 |
+
$L__func_end0:
|
448 |
+
|
449 |
+
}
|
450 |
+
// .globl __nv_rsqrtf
|
451 |
+
.visible .func (.param .b32 func_retval0) __nv_rsqrtf(
|
452 |
+
.param .b32 __nv_rsqrtf_param_0
|
453 |
+
)
|
454 |
+
{
|
455 |
+
.reg .f32 %f<3>;
|
456 |
+
$L__func_begin1:
|
457 |
+
|
458 |
+
ld.param.f32 %f1, [__nv_rsqrtf_param_0];
|
459 |
+
rsqrt.approx.ftz.f32 %f2, %f1;
|
460 |
+
st.param.f32 [func_retval0+0], %f2;
|
461 |
+
ret;
|
462 |
+
$L__func_end1:
|
463 |
+
|
464 |
+
}
|
465 |
+
.file 1 "/tmp/torchinductor_root/ci/ccig6fki6p4lxrdmgg6eudahiexcvueeol2p4qp532pvve2y463y.py"
|
466 |
+
.file 2 "/usr/local/lib/python3.10/dist-packages/torch/_inductor/triton_helpers.py"
|
467 |
+
.section .debug_abbrev
|
468 |
+
{
|
469 |
+
.b8 1
|
470 |
+
.b8 17
|
471 |
+
.b8 1
|
472 |
+
.b8 37
|
473 |
+
.b8 8
|
474 |
+
.b8 19
|
475 |
+
.b8 5
|
476 |
+
.b8 3
|
477 |
+
.b8 8
|
478 |
+
.b8 16
|
479 |
+
.b8 6
|
480 |
+
.b8 27
|
481 |
+
.b8 8
|
482 |
+
.b8 180
|
483 |
+
.b8 66
|
484 |
+
.b8 12
|
485 |
+
.b8 17
|
486 |
+
.b8 1
|
487 |
+
.b8 18
|
488 |
+
.b8 1
|
489 |
+
.b8 0
|
490 |
+
.b8 0
|
491 |
+
.b8 2
|
492 |
+
.b8 46
|
493 |
+
.b8 0
|
494 |
+
.b8 135
|
495 |
+
.b8 64
|
496 |
+
.b8 8
|
497 |
+
.b8 3
|
498 |
+
.b8 8
|
499 |
+
.b8 58
|
500 |
+
.b8 11
|
501 |
+
.b8 59
|
502 |
+
.b8 11
|
503 |
+
.b8 63
|
504 |
+
.b8 12
|
505 |
+
.b8 32
|
506 |
+
.b8 11
|
507 |
+
.b8 0
|
508 |
+
.b8 0
|
509 |
+
.b8 3
|
510 |
+
.b8 46
|
511 |
+
.b8 1
|
512 |
+
.b8 17
|
513 |
+
.b8 1
|
514 |
+
.b8 18
|
515 |
+
.b8 1
|
516 |
+
.b8 64
|
517 |
+
.b8 10
|
518 |
+
.b8 49
|
519 |
+
.b8 19
|
520 |
+
.b8 0
|
521 |
+
.b8 0
|
522 |
+
.b8 4
|
523 |
+
.b8 29
|
524 |
+
.b8 0
|
525 |
+
.b8 49
|
526 |
+
.b8 19
|
527 |
+
.b8 17
|
528 |
+
.b8 1
|
529 |
+
.b8 18
|
530 |
+
.b8 1
|
531 |
+
.b8 88
|
532 |
+
.b8 11
|
533 |
+
.b8 89
|
534 |
+
.b8 11
|
535 |
+
.b8 87
|
536 |
+
.b8 11
|
537 |
+
.b8 0
|
538 |
+
.b8 0
|
539 |
+
.b8 5
|
540 |
+
.b8 29
|
541 |
+
.b8 1
|
542 |
+
.b8 49
|
543 |
+
.b8 19
|
544 |
+
.b8 17
|
545 |
+
.b8 1
|
546 |
+
.b8 18
|
547 |
+
.b8 1
|
548 |
+
.b8 88
|
549 |
+
.b8 11
|
550 |
+
.b8 89
|
551 |
+
.b8 11
|
552 |
+
.b8 87
|
553 |
+
.b8 11
|
554 |
+
.b8 0
|
555 |
+
.b8 0
|
556 |
+
.b8 0
|
557 |
+
}
|
558 |
+
.section .debug_info
|
559 |
+
{
|
560 |
+
.b32 302
|
561 |
+
.b8 2
|
562 |
+
.b8 0
|
563 |
+
.b32 .debug_abbrev
|
564 |
+
.b8 8
|
565 |
+
.b8 1
|
566 |
+
.b8 116
|
567 |
+
.b8 114
|
568 |
+
.b8 105
|
569 |
+
.b8 116
|
570 |
+
.b8 111
|
571 |
+
.b8 110
|
572 |
+
.b8 0
|
573 |
+
.b8 2
|
574 |
+
.b8 0
|
575 |
+
.b8 99
|
576 |
+
.b8 99
|
577 |
+
.b8 105
|
578 |
+
.b8 103
|
579 |
+
.b8 54
|
580 |
+
.b8 102
|
581 |
+
.b8 107
|
582 |
+
.b8 105
|
583 |
+
.b8 54
|
584 |
+
.b8 112
|
585 |
+
.b8 52
|
586 |
+
.b8 108
|
587 |
+
.b8 120
|
588 |
+
.b8 114
|
589 |
+
.b8 100
|
590 |
+
.b8 109
|
591 |
+
.b8 103
|
592 |
+
.b8 103
|
593 |
+
.b8 54
|
594 |
+
.b8 101
|
595 |
+
.b8 117
|
596 |
+
.b8 100
|
597 |
+
.b8 97
|
598 |
+
.b8 104
|
599 |
+
.b8 105
|
600 |
+
.b8 101
|
601 |
+
.b8 120
|
602 |
+
.b8 99
|
603 |
+
.b8 118
|
604 |
+
.b8 117
|
605 |
+
.b8 101
|
606 |
+
.b8 101
|
607 |
+
.b8 111
|
608 |
+
.b8 108
|
609 |
+
.b8 50
|
610 |
+
.b8 112
|
611 |
+
.b8 52
|
612 |
+
.b8 113
|
613 |
+
.b8 112
|
614 |
+
.b8 53
|
615 |
+
.b8 51
|
616 |
+
.b8 50
|
617 |
+
.b8 112
|
618 |
+
.b8 118
|
619 |
+
.b8 118
|
620 |
+
.b8 101
|
621 |
+
.b8 50
|
622 |
+
.b8 121
|
623 |
+
.b8 52
|
624 |
+
.b8 54
|
625 |
+
.b8 51
|
626 |
+
.b8 121
|
627 |
+
.b8 46
|
628 |
+
.b8 112
|
629 |
+
.b8 121
|
630 |
+
.b8 0
|
631 |
+
.b32 .debug_line
|
632 |
+
.b8 47
|
633 |
+
.b8 116
|
634 |
+
.b8 109
|
635 |
+
.b8 112
|
636 |
+
.b8 47
|
637 |
+
.b8 116
|
638 |
+
.b8 111
|
639 |
+
.b8 114
|
640 |
+
.b8 99
|
641 |
+
.b8 104
|
642 |
+
.b8 105
|
643 |
+
.b8 110
|
644 |
+
.b8 100
|
645 |
+
.b8 117
|
646 |
+
.b8 99
|
647 |
+
.b8 116
|
648 |
+
.b8 111
|
649 |
+
.b8 114
|
650 |
+
.b8 95
|
651 |
+
.b8 114
|
652 |
+
.b8 111
|
653 |
+
.b8 111
|
654 |
+
.b8 116
|
655 |
+
.b8 47
|
656 |
+
.b8 99
|
657 |
+
.b8 105
|
658 |
+
.b8 0
|
659 |
+
.b8 1
|
660 |
+
.b64 $L__func_begin0
|
661 |
+
.b64 $L__func_end0
|
662 |
+
.b8 2
|
663 |
+
.b8 116
|
664 |
+
.b8 114
|
665 |
+
.b8 105
|
666 |
+
.b8 116
|
667 |
+
.b8 111
|
668 |
+
.b8 110
|
669 |
+
.b8 95
|
670 |
+
.b8 95
|
671 |
+
.b8 48
|
672 |
+
.b8 100
|
673 |
+
.b8 49
|
674 |
+
.b8 100
|
675 |
+
.b8 50
|
676 |
+
.b8 100
|
677 |
+
.b8 51
|
678 |
+
.b8 100
|
679 |
+
.b8 52
|
680 |
+
.b8 100
|
681 |
+
.b8 53
|
682 |
+
.b8 100
|
683 |
+
.b8 54
|
684 |
+
.b8 100
|
685 |
+
.b8 101
|
686 |
+
.b8 55
|
687 |
+
.b8 100
|
688 |
+
.b8 101
|
689 |
+
.b8 0
|
690 |
+
.b8 116
|
691 |
+
.b8 114
|
692 |
+
.b8 105
|
693 |
+
.b8 116
|
694 |
+
.b8 111
|
695 |
+
.b8 110
|
696 |
+
.b8 95
|
697 |
+
.b8 95
|
698 |
+
.b8 48
|
699 |
+
.b8 100
|
700 |
+
.b8 49
|
701 |
+
.b8 100
|
702 |
+
.b8 50
|
703 |
+
.b8 100
|
704 |
+
.b8 51
|
705 |
+
.b8 100
|
706 |
+
.b8 52
|
707 |
+
.b8 100
|
708 |
+
.b8 53
|
709 |
+
.b8 100
|
710 |
+
.b8 54
|
711 |
+
.b8 100
|
712 |
+
.b8 101
|
713 |
+
.b8 55
|
714 |
+
.b8 100
|
715 |
+
.b8 101
|
716 |
+
.b8 0
|
717 |
+
.b8 1
|
718 |
+
.b8 18
|
719 |
+
.b8 1
|
720 |
+
.b8 1
|
721 |
+
.b8 3
|
722 |
+
.b64 $L__func_begin0
|
723 |
+
.b64 $L__func_end0
|
724 |
+
.b8 1
|
725 |
+
.b8 156
|
726 |
+
.b32 125
|
727 |
+
.b8 4
|
728 |
+
.b32 125
|
729 |
+
.b64 $L__tmp1
|
730 |
+
.b64 $L__tmp4
|
731 |
+
.b8 2
|
732 |
+
.b8 47
|
733 |
+
.b8 41
|
734 |
+
.b8 4
|
735 |
+
.b32 125
|
736 |
+
.b64 $L__tmp5
|
737 |
+
.b64 $L__tmp8
|
738 |
+
.b8 2
|
739 |
+
.b8 53
|
740 |
+
.b8 44
|
741 |
+
.b8 5
|
742 |
+
.b32 125
|
743 |
+
.b64 $L__tmp6
|
744 |
+
.b64 $L__tmp9
|
745 |
+
.b8 2
|
746 |
+
.b8 53
|
747 |
+
.b8 44
|
748 |
+
.b8 4
|
749 |
+
.b32 125
|
750 |
+
.b64 $L__tmp6
|
751 |
+
.b64 $L__tmp9
|
752 |
+
.b8 2
|
753 |
+
.b8 120
|
754 |
+
.b8 46
|
755 |
+
.b8 0
|
756 |
+
.b8 0
|
757 |
+
.b8 0
|
758 |
+
}
|
759 |
+
.section .debug_pubnames
|
760 |
+
{
|
761 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
762 |
+
$L__pubNames_start0:
|
763 |
+
.b8 2
|
764 |
+
.b8 0
|
765 |
+
.b32 .debug_info
|
766 |
+
.b32 306
|
767 |
+
.b32 125
|
768 |
+
.b8 116
|
769 |
+
.b8 114
|
770 |
+
.b8 105
|
771 |
+
.b8 116
|
772 |
+
.b8 111
|
773 |
+
.b8 110
|
774 |
+
.b8 95
|
775 |
+
.b8 95
|
776 |
+
.b8 48
|
777 |
+
.b8 100
|
778 |
+
.b8 49
|
779 |
+
.b8 100
|
780 |
+
.b8 50
|
781 |
+
.b8 100
|
782 |
+
.b8 51
|
783 |
+
.b8 100
|
784 |
+
.b8 52
|
785 |
+
.b8 100
|
786 |
+
.b8 53
|
787 |
+
.b8 100
|
788 |
+
.b8 54
|
789 |
+
.b8 100
|
790 |
+
.b8 101
|
791 |
+
.b8 55
|
792 |
+
.b8 100
|
793 |
+
.b8 101
|
794 |
+
.b8 0
|
795 |
+
.b32 0
|
796 |
+
$L__pubNames_end0:
|
797 |
+
}
|
798 |
+
.section .debug_pubtypes
|
799 |
+
{
|
800 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
801 |
+
$L__pubTypes_start0:
|
802 |
+
.b8 2
|
803 |
+
.b8 0
|
804 |
+
.b32 .debug_info
|
805 |
+
.b32 306
|
806 |
+
.b32 0
|
807 |
+
$L__pubTypes_end0:
|
808 |
+
}
|
809 |
+
.section .debug_loc { }
|
.triton/dump/3cd3b6d7993c56f7d0340d40c84f737c/triton_.ttgir
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#blocked = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [8, 4], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0]}>
|
2 |
+
#blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [8, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0]}>
|
3 |
+
module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
|
4 |
+
tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
5 |
+
%cst = arith.constant dense<512> : tensor<64x1xi32, #blocked>
|
6 |
+
%cst_0 = arith.constant dense<256> : tensor<64x1xi32, #blocked>
|
7 |
+
%cst_1 = arith.constant dense<256> : tensor<64x1xi64, #blocked>
|
8 |
+
%cst_2 = arith.constant dense<0> : tensor<64x1xi64, #blocked>
|
9 |
+
%cst_3 = arith.constant dense<50257> : tensor<64x1xi64, #blocked>
|
10 |
+
%cst_4 = arith.constant dense<50257> : tensor<64x1xi64, #blocked1>
|
11 |
+
%cst_5 = arith.constant dense<0> : tensor<64x1xi64, #blocked1>
|
12 |
+
%c0_i32 = arith.constant 0 : i32
|
13 |
+
%c4_i32 = arith.constant 4 : i32
|
14 |
+
%c256_i32 = arith.constant 256 : i32
|
15 |
+
%cst_6 = arith.constant dense<1.000000e+00> : tensor<64x4xf32, #blocked>
|
16 |
+
%cst_7 = arith.constant 0.000000e+00 : f32
|
17 |
+
%cst_8 = arith.constant dense<0.000000e+00> : tensor<64x4xbf16, #blocked>
|
18 |
+
%cst_9 = arith.constant dense<0.000000e+00> : tensor<1x4xf32, #blocked>
|
19 |
+
%cst_10 = arith.constant dense<0.000000e+00> : tensor<64x4xf32, #blocked>
|
20 |
+
%cst_11 = arith.constant dense<256> : tensor<1x4xi32, #blocked>
|
21 |
+
%cst_12 = arith.constant dense<9.99999974E-6> : tensor<64x1xf32, #blocked>
|
22 |
+
%cst_13 = arith.constant dense<2.560000e+02> : tensor<64x1xf32, #blocked>
|
23 |
+
%c64_i32 = arith.constant 64 : i32
|
24 |
+
%0 = tt.get_program_id x : i32
|
25 |
+
%1 = arith.muli %0, %c64_i32 : i32
|
26 |
+
%2 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>
|
27 |
+
%3 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked1}>>
|
28 |
+
%4 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xi32, #blocked>
|
29 |
+
%5 = tt.expand_dims %3 {axis = 1 : i32} : (tensor<64xi32, #triton_gpu.slice<{dim = 1, parent = #blocked1}>>) -> tensor<64x1xi32, #blocked1>
|
30 |
+
%6 = tt.splat %1 : (i32) -> tensor<64x1xi32, #blocked>
|
31 |
+
%7 = tt.splat %1 : (i32) -> tensor<64x1xi32, #blocked1>
|
32 |
+
%8 = arith.addi %6, %4 : tensor<64x1xi32, #blocked>
|
33 |
+
%9 = arith.addi %7, %5 : tensor<64x1xi32, #blocked1>
|
34 |
+
%10 = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32, #triton_gpu.slice<{dim = 0, parent = #blocked}>>
|
35 |
+
%11 = tt.expand_dims %10 {axis = 0 : i32} : (tensor<4xi32, #triton_gpu.slice<{dim = 0, parent = #blocked}>>) -> tensor<1x4xi32, #blocked>
|
36 |
+
%12 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<64x1x!tt.ptr<i64, 1>, #blocked>
|
37 |
+
%13 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<64x1x!tt.ptr<i64, 1>, #blocked1>
|
38 |
+
%14 = tt.addptr %12, %8 : tensor<64x1x!tt.ptr<i64, 1>, #blocked>, tensor<64x1xi32, #blocked>
|
39 |
+
%15 = tt.addptr %13, %9 : tensor<64x1x!tt.ptr<i64, 1>, #blocked1>, tensor<64x1xi32, #blocked1>
|
40 |
+
%16 = tt.load %14 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x1xi64, #blocked>
|
41 |
+
%17 = tt.load %15 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x1xi64, #blocked1>
|
42 |
+
%18 = arith.remsi %8, %cst : tensor<64x1xi32, #blocked>
|
43 |
+
%19 = arith.muli %18, %cst_0 : tensor<64x1xi32, #blocked>
|
44 |
+
%20 = tt.broadcast %19 : (tensor<64x1xi32, #blocked>) -> tensor<64x4xi32, #blocked>
|
45 |
+
%21 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<64x4x!tt.ptr<f32, 1>, #blocked>
|
46 |
+
%22 = arith.muli %8, %cst_0 : tensor<64x1xi32, #blocked>
|
47 |
+
%23 = tt.broadcast %22 : (tensor<64x1xi32, #blocked>) -> tensor<64x4xi32, #blocked>
|
48 |
+
%24 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<64x4x!tt.ptr<bf16, 1>, #blocked>
|
49 |
+
%25 = arith.addi %16, %cst_3 : tensor<64x1xi64, #blocked>
|
50 |
+
%26 = arith.addi %17, %cst_4 : tensor<64x1xi64, #blocked1>
|
51 |
+
%27 = arith.cmpi slt, %16, %cst_2 : tensor<64x1xi64, #blocked>
|
52 |
+
%28 = arith.cmpi slt, %17, %cst_5 : tensor<64x1xi64, #blocked1>
|
53 |
+
%29 = arith.select %27, %25, %16 : tensor<64x1xi1, #blocked>, tensor<64x1xi64, #blocked>
|
54 |
+
%30 = arith.select %28, %26, %17 : tensor<64x1xi1, #blocked1>, tensor<64x1xi64, #blocked1>
|
55 |
+
%31 = arith.cmpi sge, %30, %cst_5 : tensor<64x1xi64, #blocked1>
|
56 |
+
%32 = arith.cmpi slt, %30, %cst_4 : tensor<64x1xi64, #blocked1>
|
57 |
+
%33 = arith.andi %31, %32 : tensor<64x1xi1, #blocked1>
|
58 |
+
%34 = arith.muli %29, %cst_1 : tensor<64x1xi64, #blocked>
|
59 |
+
%35 = tt.broadcast %34 : (tensor<64x1xi64, #blocked>) -> tensor<64x4xi64, #blocked>
|
60 |
+
%36 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<64x4x!tt.ptr<f32, 1>, #blocked>
|
61 |
+
%37:3 = scf.for %arg8 = %c0_i32 to %c256_i32 step %c4_i32 iter_args(%arg9 = %cst_10, %arg10 = %cst_10, %arg11 = %cst_10) -> (tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>) : i32 {
|
62 |
+
%46 = tt.splat %arg8 : (i32) -> tensor<1x4xi32, #blocked>
|
63 |
+
%47 = arith.addi %46, %11 : tensor<1x4xi32, #blocked>
|
64 |
+
%48 = arith.cmpi slt, %47, %cst_11 : tensor<1x4xi32, #blocked>
|
65 |
+
%49 = tt.broadcast %47 : (tensor<1x4xi32, #blocked>) -> tensor<64x4xi32, #blocked>
|
66 |
+
%50 = arith.addi %49, %20 : tensor<64x4xi32, #blocked>
|
67 |
+
%51 = tt.addptr %21, %50 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi32, #blocked>
|
68 |
+
%52 = tt.broadcast %48 : (tensor<1x4xi1, #blocked>) -> tensor<64x4xi1, #blocked>
|
69 |
+
%53 = tt.load %51, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
|
70 |
+
%54 = arith.addi %49, %23 : tensor<64x4xi32, #blocked>
|
71 |
+
%55 = tt.addptr %24, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
|
72 |
+
%56 = tt.load %55, %52, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xbf16, #blocked>
|
73 |
+
%57 = arith.extf %56 : tensor<64x4xbf16, #blocked> to tensor<64x4xf32, #blocked>
|
74 |
+
tt.assert %33, "index out of bounds: 0 <= tmp3 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<64x1xi1, #blocked1>
|
75 |
+
%58 = arith.extsi %47 : tensor<1x4xi32, #blocked> to tensor<1x4xi64, #blocked>
|
76 |
+
%59 = tt.broadcast %58 : (tensor<1x4xi64, #blocked>) -> tensor<64x4xi64, #blocked>
|
77 |
+
%60 = arith.addi %59, %35 : tensor<64x4xi64, #blocked>
|
78 |
+
%61 = tt.addptr %36, %60 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi64, #blocked>
|
79 |
+
%62 = tt.load %61, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
|
80 |
+
%63 = arith.addf %62, %53 : tensor<64x4xf32, #blocked>
|
81 |
+
%64 = arith.addf %63, %57 : tensor<64x4xf32, #blocked>
|
82 |
+
%65 = arith.subf %64, %arg9 : tensor<64x4xf32, #blocked>
|
83 |
+
%66 = arith.addf %arg11, %cst_6 : tensor<64x4xf32, #blocked>
|
84 |
+
%67 = arith.divf %65, %66 : tensor<64x4xf32, #blocked>
|
85 |
+
%68 = arith.addf %arg9, %67 : tensor<64x4xf32, #blocked>
|
86 |
+
%69 = arith.subf %64, %68 : tensor<64x4xf32, #blocked>
|
87 |
+
%70 = arith.mulf %65, %69 : tensor<64x4xf32, #blocked>
|
88 |
+
%71 = arith.addf %arg10, %70 : tensor<64x4xf32, #blocked>
|
89 |
+
%72 = arith.select %52, %68, %arg9 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
|
90 |
+
%73 = arith.select %52, %71, %arg10 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
|
91 |
+
%74 = arith.select %52, %66, %arg11 : tensor<64x4xi1, #blocked>, tensor<64x4xf32, #blocked>
|
92 |
+
scf.yield %72, %73, %74 : tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>
|
93 |
+
}
|
94 |
+
%38:3 = "tt.reduce"(%37#0, %37#1, %37#2) <{axis = 1 : i32}> ({
|
95 |
+
^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
|
96 |
+
%46 = arith.subf %arg11, %arg8 : f32
|
97 |
+
%47 = arith.addf %arg10, %arg13 : f32
|
98 |
+
%48 = arith.cmpf oeq, %47, %cst_7 : f32
|
99 |
+
%49 = arith.divf %arg13, %47 : f32
|
100 |
+
%50 = arith.select %48, %cst_7, %49 : f32
|
101 |
+
%51 = arith.mulf %46, %50 : f32
|
102 |
+
%52 = arith.addf %arg8, %51 : f32
|
103 |
+
%53 = arith.addf %arg9, %arg12 : f32
|
104 |
+
%54 = arith.mulf %46, %46 : f32
|
105 |
+
%55 = arith.mulf %54, %arg10 : f32
|
106 |
+
%56 = arith.mulf %55, %50 : f32
|
107 |
+
%57 = arith.addf %53, %56 : f32
|
108 |
+
tt.reduce.return %52, %57, %47 : f32, f32, f32
|
109 |
+
}) : (tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>, tensor<64x4xf32, #blocked>) -> (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>, tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>, tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>)
|
110 |
+
%39 = tt.expand_dims %38#0 {axis = 1 : i32} : (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xf32, #blocked>
|
111 |
+
%40 = tt.expand_dims %38#1 {axis = 1 : i32} : (tensor<64xf32, #triton_gpu.slice<{dim = 1, parent = #blocked}>>) -> tensor<64x1xf32, #blocked>
|
112 |
+
%41 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x4x!tt.ptr<f32, 1>, #blocked>
|
113 |
+
%42 = tt.broadcast %39 : (tensor<64x1xf32, #blocked>) -> tensor<64x4xf32, #blocked>
|
114 |
+
%43 = arith.divf %40, %cst_13 : tensor<64x1xf32, #blocked>
|
115 |
+
%44 = arith.addf %43, %cst_12 : tensor<64x1xf32, #blocked>
|
116 |
+
%45 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<64x4x!tt.ptr<bf16, 1>, #blocked>
|
117 |
+
scf.for %arg8 = %c0_i32 to %c256_i32 step %c4_i32 : i32 {
|
118 |
+
%46 = tt.splat %arg8 : (i32) -> tensor<1x4xi32, #blocked>
|
119 |
+
%47 = arith.addi %46, %11 : tensor<1x4xi32, #blocked>
|
120 |
+
%48 = arith.cmpi slt, %47, %cst_11 : tensor<1x4xi32, #blocked>
|
121 |
+
%49 = tt.broadcast %47 : (tensor<1x4xi32, #blocked>) -> tensor<64x4xi32, #blocked>
|
122 |
+
%50 = arith.addi %49, %20 : tensor<64x4xi32, #blocked>
|
123 |
+
%51 = tt.addptr %21, %50 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi32, #blocked>
|
124 |
+
%52 = tt.broadcast %48 : (tensor<1x4xi1, #blocked>) -> tensor<64x4xi1, #blocked>
|
125 |
+
%53 = tt.load %51, %52, %cst_10 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
|
126 |
+
%54 = arith.addi %49, %23 : tensor<64x4xi32, #blocked>
|
127 |
+
%55 = tt.addptr %24, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
|
128 |
+
%56 = tt.load %55, %52, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<64x4xbf16, #blocked>
|
129 |
+
%57 = arith.extf %56 : tensor<64x4xbf16, #blocked> to tensor<64x4xf32, #blocked>
|
130 |
+
%58 = tt.addptr %41, %47 : tensor<1x4x!tt.ptr<f32, 1>, #blocked>, tensor<1x4xi32, #blocked>
|
131 |
+
%59 = tt.load %58, %48, %cst_9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x4xf32, #blocked>
|
132 |
+
tt.assert %33, "index out of bounds: 0 <= tmp16 < 50257", "<frozen importlib._bootstrap_external>", "_call_with_frames_removed", 883 : tensor<64x1xi1, #blocked1>
|
133 |
+
%60 = arith.extsi %47 : tensor<1x4xi32, #blocked> to tensor<1x4xi64, #blocked>
|
134 |
+
%61 = tt.broadcast %60 : (tensor<1x4xi64, #blocked>) -> tensor<64x4xi64, #blocked>
|
135 |
+
%62 = arith.addi %61, %35 : tensor<64x4xi64, #blocked>
|
136 |
+
%63 = tt.addptr %36, %62 : tensor<64x4x!tt.ptr<f32, 1>, #blocked>, tensor<64x4xi64, #blocked>
|
137 |
+
%64 = tt.load %63, %52, %cst_10 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<64x4xf32, #blocked>
|
138 |
+
%65 = arith.addf %64, %53 : tensor<64x4xf32, #blocked>
|
139 |
+
%66 = arith.addf %65, %57 : tensor<64x4xf32, #blocked>
|
140 |
+
%67 = arith.subf %66, %42 : tensor<64x4xf32, #blocked>
|
141 |
+
%68 = tt.extern_elementwise %44 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<64x1xf32, #blocked>) -> tensor<64x1xf32, #blocked>
|
142 |
+
%69 = tt.broadcast %68 : (tensor<64x1xf32, #blocked>) -> tensor<64x4xf32, #blocked>
|
143 |
+
%70 = arith.mulf %67, %69 : tensor<64x4xf32, #blocked>
|
144 |
+
%71 = tt.broadcast %59 : (tensor<1x4xf32, #blocked>) -> tensor<64x4xf32, #blocked>
|
145 |
+
%72 = arith.mulf %70, %71 : tensor<64x4xf32, #blocked>
|
146 |
+
%73 = tt.addptr %45, %54 : tensor<64x4x!tt.ptr<bf16, 1>, #blocked>, tensor<64x4xi32, #blocked>
|
147 |
+
%74 = arith.truncf %72 : tensor<64x4xf32, #blocked> to tensor<64x4xbf16, #blocked>
|
148 |
+
tt.store %73, %74, %52 {cache = 1 : i32, evict = 1 : i32} : tensor<64x4xbf16, #blocked>
|
149 |
+
}
|
150 |
+
tt.return
|
151 |
+
}
|
152 |
+
}
|
.triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.cubin
ADDED
Binary file (4.9 kB). View file
|
|
.triton/dump/4993935f9a0e5939755cfb42600362cf/triton_.ptx
ADDED
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1d2de
|
10 |
+
|
11 |
+
.visible .entry triton__0d1d2de(
|
12 |
+
.param .u64 triton__0d1d2de_param_0,
|
13 |
+
.param .u64 triton__0d1d2de_param_1,
|
14 |
+
.param .u32 triton__0d1d2de_param_2
|
15 |
+
)
|
16 |
+
.maxntid 256, 1, 1
|
17 |
+
{
|
18 |
+
.reg .pred %p<3>;
|
19 |
+
.reg .b16 %rs<3>;
|
20 |
+
.reg .b32 %r<13>;
|
21 |
+
.reg .b64 %rd<7>;
|
22 |
+
.loc 1 18 0
|
23 |
+
$L__func_begin0:
|
24 |
+
.loc 1 18 0
|
25 |
+
|
26 |
+
ld.param.u64 %rd3, [triton__0d1d2de_param_0];
|
27 |
+
ld.param.u64 %rd4, [triton__0d1d2de_param_1];
|
28 |
+
$L__tmp0:
|
29 |
+
.loc 1 21 36
|
30 |
+
mov.u32 %r7, %tid.x;
|
31 |
+
shl.b32 %r8, %r7, 1;
|
32 |
+
and.b32 %r9, %r8, 510;
|
33 |
+
.loc 1 20 28
|
34 |
+
mov.u32 %r1, %ctaid.x;
|
35 |
+
.loc 1 20 33
|
36 |
+
shl.b32 %r10, %r1, 9;
|
37 |
+
.loc 1 21 23
|
38 |
+
or.b32 %r11, %r10, %r9;
|
39 |
+
.loc 1 24 30
|
40 |
+
mul.wide.s32 %rd5, %r11, 4;
|
41 |
+
add.s64 %rd1, %rd3, %rd5;
|
42 |
+
mov.pred %p1, -1;
|
43 |
+
.loc 1 24 35
|
44 |
+
mov.u32 %r4, 0x0;
|
45 |
+
mov.u32 %r5, 0x0;
|
46 |
+
@%p1 ld.global.v2.b32 { %r4, %r5 }, [ %rd1 + 0 ];
|
47 |
+
.loc 1 26 25
|
48 |
+
mul.wide.s32 %rd6, %r11, 2;
|
49 |
+
add.s64 %rd2, %rd4, %rd6;
|
50 |
+
.loc 1 26 36
|
51 |
+
cvt.rn.bf16.f32 %rs1, %r4;
|
52 |
+
cvt.rn.bf16.f32 %rs2, %r5;
|
53 |
+
mov.b32 %r12, {%rs1, %rs2};
|
54 |
+
@%p1 st.global.b32 [ %rd2 + 0 ], { %r12 };
|
55 |
+
.loc 1 26 4
|
56 |
+
ret;
|
57 |
+
$L__tmp1:
|
58 |
+
$L__func_end0:
|
59 |
+
|
60 |
+
}
|
61 |
+
.file 1 "/tmp/torchinductor_root/zj/czjxjqxojsyyr4zmce6q6twysnucw6p4l5ujgp6ts2ecrm3ue3ex.py"
|
62 |
+
.section .debug_abbrev
|
63 |
+
{
|
64 |
+
.b8 1
|
65 |
+
.b8 17
|
66 |
+
.b8 1
|
67 |
+
.b8 37
|
68 |
+
.b8 8
|
69 |
+
.b8 19
|
70 |
+
.b8 5
|
71 |
+
.b8 3
|
72 |
+
.b8 8
|
73 |
+
.b8 16
|
74 |
+
.b8 6
|
75 |
+
.b8 27
|
76 |
+
.b8 8
|
77 |
+
.b8 180
|
78 |
+
.b8 66
|
79 |
+
.b8 12
|
80 |
+
.b8 17
|
81 |
+
.b8 1
|
82 |
+
.b8 18
|
83 |
+
.b8 1
|
84 |
+
.b8 0
|
85 |
+
.b8 0
|
86 |
+
.b8 2
|
87 |
+
.b8 46
|
88 |
+
.b8 0
|
89 |
+
.b8 17
|
90 |
+
.b8 1
|
91 |
+
.b8 18
|
92 |
+
.b8 1
|
93 |
+
.b8 64
|
94 |
+
.b8 10
|
95 |
+
.b8 135
|
96 |
+
.b8 64
|
97 |
+
.b8 8
|
98 |
+
.b8 3
|
99 |
+
.b8 8
|
100 |
+
.b8 58
|
101 |
+
.b8 11
|
102 |
+
.b8 59
|
103 |
+
.b8 11
|
104 |
+
.b8 63
|
105 |
+
.b8 12
|
106 |
+
.b8 0
|
107 |
+
.b8 0
|
108 |
+
.b8 0
|
109 |
+
}
|
110 |
+
.section .debug_info
|
111 |
+
{
|
112 |
+
.b32 176
|
113 |
+
.b8 2
|
114 |
+
.b8 0
|
115 |
+
.b32 .debug_abbrev
|
116 |
+
.b8 8
|
117 |
+
.b8 1
|
118 |
+
.b8 116
|
119 |
+
.b8 114
|
120 |
+
.b8 105
|
121 |
+
.b8 116
|
122 |
+
.b8 111
|
123 |
+
.b8 110
|
124 |
+
.b8 0
|
125 |
+
.b8 2
|
126 |
+
.b8 0
|
127 |
+
.b8 99
|
128 |
+
.b8 122
|
129 |
+
.b8 106
|
130 |
+
.b8 120
|
131 |
+
.b8 106
|
132 |
+
.b8 113
|
133 |
+
.b8 120
|
134 |
+
.b8 111
|
135 |
+
.b8 106
|
136 |
+
.b8 115
|
137 |
+
.b8 121
|
138 |
+
.b8 121
|
139 |
+
.b8 114
|
140 |
+
.b8 52
|
141 |
+
.b8 122
|
142 |
+
.b8 109
|
143 |
+
.b8 99
|
144 |
+
.b8 101
|
145 |
+
.b8 54
|
146 |
+
.b8 113
|
147 |
+
.b8 54
|
148 |
+
.b8 116
|
149 |
+
.b8 119
|
150 |
+
.b8 121
|
151 |
+
.b8 115
|
152 |
+
.b8 110
|
153 |
+
.b8 117
|
154 |
+
.b8 99
|
155 |
+
.b8 119
|
156 |
+
.b8 54
|
157 |
+
.b8 112
|
158 |
+
.b8 52
|
159 |
+
.b8 108
|
160 |
+
.b8 53
|
161 |
+
.b8 117
|
162 |
+
.b8 106
|
163 |
+
.b8 103
|
164 |
+
.b8 112
|
165 |
+
.b8 54
|
166 |
+
.b8 116
|
167 |
+
.b8 115
|
168 |
+
.b8 50
|
169 |
+
.b8 101
|
170 |
+
.b8 99
|
171 |
+
.b8 114
|
172 |
+
.b8 109
|
173 |
+
.b8 51
|
174 |
+
.b8 117
|
175 |
+
.b8 101
|
176 |
+
.b8 51
|
177 |
+
.b8 101
|
178 |
+
.b8 120
|
179 |
+
.b8 46
|
180 |
+
.b8 112
|
181 |
+
.b8 121
|
182 |
+
.b8 0
|
183 |
+
.b32 .debug_line
|
184 |
+
.b8 47
|
185 |
+
.b8 116
|
186 |
+
.b8 109
|
187 |
+
.b8 112
|
188 |
+
.b8 47
|
189 |
+
.b8 116
|
190 |
+
.b8 111
|
191 |
+
.b8 114
|
192 |
+
.b8 99
|
193 |
+
.b8 104
|
194 |
+
.b8 105
|
195 |
+
.b8 110
|
196 |
+
.b8 100
|
197 |
+
.b8 117
|
198 |
+
.b8 99
|
199 |
+
.b8 116
|
200 |
+
.b8 111
|
201 |
+
.b8 114
|
202 |
+
.b8 95
|
203 |
+
.b8 114
|
204 |
+
.b8 111
|
205 |
+
.b8 111
|
206 |
+
.b8 116
|
207 |
+
.b8 47
|
208 |
+
.b8 122
|
209 |
+
.b8 106
|
210 |
+
.b8 0
|
211 |
+
.b8 1
|
212 |
+
.b64 $L__func_begin0
|
213 |
+
.b64 $L__func_end0
|
214 |
+
.b8 2
|
215 |
+
.b64 $L__func_begin0
|
216 |
+
.b64 $L__func_end0
|
217 |
+
.b8 1
|
218 |
+
.b8 156
|
219 |
+
.b8 116
|
220 |
+
.b8 114
|
221 |
+
.b8 105
|
222 |
+
.b8 116
|
223 |
+
.b8 111
|
224 |
+
.b8 110
|
225 |
+
.b8 95
|
226 |
+
.b8 95
|
227 |
+
.b8 48
|
228 |
+
.b8 100
|
229 |
+
.b8 49
|
230 |
+
.b8 100
|
231 |
+
.b8 50
|
232 |
+
.b8 100
|
233 |
+
.b8 101
|
234 |
+
.b8 0
|
235 |
+
.b8 116
|
236 |
+
.b8 114
|
237 |
+
.b8 105
|
238 |
+
.b8 116
|
239 |
+
.b8 111
|
240 |
+
.b8 110
|
241 |
+
.b8 95
|
242 |
+
.b8 95
|
243 |
+
.b8 48
|
244 |
+
.b8 100
|
245 |
+
.b8 49
|
246 |
+
.b8 100
|
247 |
+
.b8 50
|
248 |
+
.b8 100
|
249 |
+
.b8 101
|
250 |
+
.b8 0
|
251 |
+
.b8 1
|
252 |
+
.b8 18
|
253 |
+
.b8 1
|
254 |
+
.b8 0
|
255 |
+
}
|
256 |
+
.section .debug_pubnames
|
257 |
+
{
|
258 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
259 |
+
$L__pubNames_start0:
|
260 |
+
.b8 2
|
261 |
+
.b8 0
|
262 |
+
.b32 .debug_info
|
263 |
+
.b32 180
|
264 |
+
.b32 125
|
265 |
+
.b8 116
|
266 |
+
.b8 114
|
267 |
+
.b8 105
|
268 |
+
.b8 116
|
269 |
+
.b8 111
|
270 |
+
.b8 110
|
271 |
+
.b8 95
|
272 |
+
.b8 95
|
273 |
+
.b8 48
|
274 |
+
.b8 100
|
275 |
+
.b8 49
|
276 |
+
.b8 100
|
277 |
+
.b8 50
|
278 |
+
.b8 100
|
279 |
+
.b8 101
|
280 |
+
.b8 0
|
281 |
+
.b32 0
|
282 |
+
$L__pubNames_end0:
|
283 |
+
}
|
284 |
+
.section .debug_pubtypes
|
285 |
+
{
|
286 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
287 |
+
$L__pubTypes_start0:
|
288 |
+
.b8 2
|
289 |
+
.b8 0
|
290 |
+
.b32 .debug_info
|
291 |
+
.b32 180
|
292 |
+
.b32 0
|
293 |
+
$L__pubTypes_end0:
|
294 |
+
}
|
295 |
+
.section .debug_loc { }
|
.triton/dump/4c6ad48573c74d55ed79384f6b432d50/triton_.ttir
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1d2de(%arg0: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
3 |
+
%c1024_i32 = arith.constant 1024 : i32
|
4 |
+
%0 = tt.get_program_id x : i32
|
5 |
+
%1 = arith.muli %0, %c1024_i32 : i32
|
6 |
+
%2 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
|
7 |
+
%3 = tt.splat %1 : (i32) -> tensor<1024xi32>
|
8 |
+
%4 = arith.addi %3, %2 : tensor<1024xi32>
|
9 |
+
%5 = tt.splat %arg0 : (!tt.ptr<f32, 1>) -> tensor<1024x!tt.ptr<f32, 1>>
|
10 |
+
%6 = tt.addptr %5, %4 : tensor<1024x!tt.ptr<f32, 1>>, tensor<1024xi32>
|
11 |
+
%7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1024xf32>
|
12 |
+
%8 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<1024x!tt.ptr<bf16, 1>>
|
13 |
+
%9 = tt.addptr %8, %4 : tensor<1024x!tt.ptr<bf16, 1>>, tensor<1024xi32>
|
14 |
+
%10 = arith.truncf %7 : tensor<1024xf32> to tensor<1024xbf16>
|
15 |
+
tt.store %9, %10 {cache = 1 : i32, evict = 1 : i32} : tensor<1024xbf16>
|
16 |
+
tt.return
|
17 |
+
}
|
18 |
+
}
|
.triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttgir
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#blocked = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [8], order = [0], CTAsPerCGA = [1], CTASplitNum = [1], CTAOrder = [0]}>
|
2 |
+
module attributes {"triton_gpu.compute-capability" = 89 : i32, "triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-warp" = 32 : i32} {
|
3 |
+
tt.func public @triton__0d1d2de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
4 |
+
%cst = arith.constant dense<1.000000e+00> : tensor<512xf32, #blocked>
|
5 |
+
%cst_0 = arith.constant dense<0.707106769> : tensor<512xf32, #blocked>
|
6 |
+
%cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32, #blocked>
|
7 |
+
%c512_i32 = arith.constant 512 : i32
|
8 |
+
%0 = tt.get_program_id x : i32
|
9 |
+
%1 = arith.muli %0, %c512_i32 : i32
|
10 |
+
%2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32, #blocked>
|
11 |
+
%3 = tt.splat %1 : (i32) -> tensor<512xi32, #blocked>
|
12 |
+
%4 = arith.addi %3, %2 : tensor<512xi32, #blocked>
|
13 |
+
%5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
|
14 |
+
%6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
|
15 |
+
%7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16, #blocked>
|
16 |
+
%8 = arith.extf %7 : tensor<512xbf16, #blocked> to tensor<512xf32, #blocked>
|
17 |
+
%9 = arith.mulf %8, %cst_1 : tensor<512xf32, #blocked>
|
18 |
+
%10 = arith.mulf %8, %cst_0 : tensor<512xf32, #blocked>
|
19 |
+
%11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32, #blocked>) -> tensor<512xf32, #blocked>
|
20 |
+
%12 = arith.addf %11, %cst : tensor<512xf32, #blocked>
|
21 |
+
%13 = arith.mulf %9, %12 : tensor<512xf32, #blocked>
|
22 |
+
%14 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>, #blocked>
|
23 |
+
%15 = tt.addptr %14, %4 : tensor<512x!tt.ptr<bf16, 1>, #blocked>, tensor<512xi32, #blocked>
|
24 |
+
%16 = arith.truncf %13 : tensor<512xf32, #blocked> to tensor<512xbf16, #blocked>
|
25 |
+
tt.store %15, %16 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16, #blocked>
|
26 |
+
tt.return
|
27 |
+
}
|
28 |
+
}
|
.triton/dump/4ce9eb7fe63f19e54893f0c74df91471/triton_.ttir
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1d2de(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
3 |
+
%cst = arith.constant dense<1.000000e+00> : tensor<512xf32>
|
4 |
+
%cst_0 = arith.constant dense<0.707106769> : tensor<512xf32>
|
5 |
+
%cst_1 = arith.constant dense<5.000000e-01> : tensor<512xf32>
|
6 |
+
%c512_i32 = arith.constant 512 : i32
|
7 |
+
%0 = tt.get_program_id x : i32
|
8 |
+
%1 = arith.muli %0, %c512_i32 : i32
|
9 |
+
%2 = tt.make_range {end = 512 : i32, start = 0 : i32} : tensor<512xi32>
|
10 |
+
%3 = tt.splat %1 : (i32) -> tensor<512xi32>
|
11 |
+
%4 = arith.addi %3, %2 : tensor<512xi32>
|
12 |
+
%5 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>>
|
13 |
+
%6 = tt.addptr %5, %4 : tensor<512x!tt.ptr<bf16, 1>>, tensor<512xi32>
|
14 |
+
%7 = tt.load %6 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<512xbf16>
|
15 |
+
%8 = arith.extf %7 : tensor<512xbf16> to tensor<512xf32>
|
16 |
+
%9 = arith.mulf %8, %cst_1 : tensor<512xf32>
|
17 |
+
%10 = arith.mulf %8, %cst_0 : tensor<512xf32>
|
18 |
+
%11 = tt.extern_elementwise %10 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_erff"} : (tensor<512xf32>) -> tensor<512xf32>
|
19 |
+
%12 = arith.addf %11, %cst : tensor<512xf32>
|
20 |
+
%13 = arith.mulf %9, %12 : tensor<512xf32>
|
21 |
+
%14 = tt.splat %arg1 : (!tt.ptr<bf16, 1>) -> tensor<512x!tt.ptr<bf16, 1>>
|
22 |
+
%15 = tt.addptr %14, %4 : tensor<512x!tt.ptr<bf16, 1>>, tensor<512xi32>
|
23 |
+
%16 = arith.truncf %13 : tensor<512xf32> to tensor<512xbf16>
|
24 |
+
tt.store %15, %16 {cache = 1 : i32, evict = 1 : i32} : tensor<512xbf16>
|
25 |
+
tt.return
|
26 |
+
}
|
27 |
+
}
|
.triton/dump/51e329eae41e4ee17aa201fff8371d94/triton_.llir
ADDED
The diff for this file is too large to render.
See raw diff
|
|
.triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.llir
ADDED
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
; ModuleID = 'LLVMDialectModule'
|
2 |
+
source_filename = "LLVMDialectModule"
|
3 |
+
|
4 |
+
@assertFunc_1 = internal constant [8 x i8] c"<module>"
|
5 |
+
@assertFile_1 = internal constant [68 x i8] c"/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py"
|
6 |
+
@assertMessage_1 = internal constant [39 x i8] c"index out of bounds: 0 <= tmp16 < 50257"
|
7 |
+
@assertFunc_0 = internal constant [8 x i8] c"<module>"
|
8 |
+
@assertFile_0 = internal constant [68 x i8] c"/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py"
|
9 |
+
@assertMessage_0 = internal constant [38 x i8] c"index out of bounds: 0 <= tmp3 < 50257"
|
10 |
+
@global_smem = external addrspace(3) global [0 x i8]
|
11 |
+
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
|
12 |
+
|
13 |
+
declare void @__assertfail(ptr, ptr, i32, ptr, i64) local_unnamed_addr
|
14 |
+
|
15 |
+
define void @triton__0d1d2d3d4d5d6de7de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, i32 %6, i32 %7) local_unnamed_addr !dbg !7 {
|
16 |
+
%9 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
|
17 |
+
%10 = and i32 %9, 31, !dbg !10
|
18 |
+
%11 = lshr i32 %9, 6, !dbg !10
|
19 |
+
%12 = and i32 %11, 1, !dbg !10
|
20 |
+
%13 = and i32 %9, 1, !dbg !10
|
21 |
+
%urem = shl i32 %9, 1, !dbg !11
|
22 |
+
%14 = and i32 %urem, 126, !dbg !11
|
23 |
+
%15 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #6, !dbg !12
|
24 |
+
%16 = shl i32 %15, 1, !dbg !13
|
25 |
+
%17 = or i32 %16, %12, !dbg !14
|
26 |
+
%18 = or i32 %16, %13, !dbg !14
|
27 |
+
%19 = sext i32 %17 to i64, !dbg !15
|
28 |
+
%20 = getelementptr i64, ptr addrspace(1) %0, i64 %19, !dbg !15
|
29 |
+
%21 = sext i32 %18 to i64, !dbg !15
|
30 |
+
%22 = getelementptr i64, ptr addrspace(1) %0, i64 %21, !dbg !15
|
31 |
+
%23 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %20, i1 true) #6, !dbg !16
|
32 |
+
%24 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %20, i1 true) #6, !dbg !16
|
33 |
+
%25 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %22, i1 true) #6, !dbg !16
|
34 |
+
%26 = srem i32 %17, 512, !dbg !17
|
35 |
+
%27 = shl nsw i32 %26, 8, !dbg !18
|
36 |
+
%28 = shl i32 %17, 8, !dbg !19
|
37 |
+
%29 = add i64 %25, 50257, !dbg !20
|
38 |
+
%30 = icmp slt i64 %23, 0, !dbg !21
|
39 |
+
%31 = icmp slt i64 %25, 0, !dbg !21
|
40 |
+
%32 = select i1 %31, i64 %29, i64 %25, !dbg !22
|
41 |
+
%33 = icmp ugt i64 %32, 50256, !dbg !23
|
42 |
+
%34 = shl i64 %23, 8, !dbg !24
|
43 |
+
%35 = add i64 %34, 12865792, !dbg !24
|
44 |
+
%36 = select i1 %30, i64 %35, i64 %34, !dbg !24
|
45 |
+
%37 = getelementptr float, ptr addrspace(1) %1, i64 %36
|
46 |
+
%38 = or i32 %14, %27, !dbg !25
|
47 |
+
%39 = sext i32 %38 to i64, !dbg !26
|
48 |
+
%40 = getelementptr float, ptr addrspace(1) %2, i64 %39, !dbg !26
|
49 |
+
%41 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %40, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !27
|
50 |
+
%42 = extractvalue { i32, i32 } %41, 0, !dbg !27
|
51 |
+
%43 = extractvalue { i32, i32 } %41, 1, !dbg !27
|
52 |
+
%44 = insertelement <2 x i32> poison, i32 %42, i64 0, !dbg !27
|
53 |
+
%45 = insertelement <2 x i32> %44, i32 %43, i64 1, !dbg !27
|
54 |
+
%46 = bitcast <2 x i32> %45 to <2 x float>, !dbg !27
|
55 |
+
%47 = or i32 %14, %28, !dbg !28
|
56 |
+
%48 = sext i32 %47 to i64, !dbg !29
|
57 |
+
%49 = getelementptr i16, ptr addrspace(1) %3, i64 %48, !dbg !29
|
58 |
+
%50 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %49, i1 true, i32 0, i1 true) #6, !dbg !30
|
59 |
+
%51 = trunc i32 %50 to i16, !dbg !30
|
60 |
+
%extelt.offset2 = lshr i32 %50, 16, !dbg !30
|
61 |
+
%52 = trunc i32 %extelt.offset2 to i16, !dbg !30
|
62 |
+
%53 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %51) #6, !dbg !31
|
63 |
+
%54 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %52) #6, !dbg !31
|
64 |
+
br i1 %33, label %55, label %56, !dbg !32
|
65 |
+
|
66 |
+
55: ; preds = %8
|
67 |
+
tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 1892, ptr nonnull @assertFunc_0, i64 1), !dbg !32
|
68 |
+
br label %56, !dbg !32
|
69 |
+
|
70 |
+
56: ; preds = %55, %8
|
71 |
+
%57 = zext nneg i32 %14 to i64, !dbg !33
|
72 |
+
%58 = getelementptr float, ptr addrspace(1) %37, i64 %57, !dbg !34
|
73 |
+
%59 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %58, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !35
|
74 |
+
%60 = extractvalue { i32, i32 } %59, 0, !dbg !35
|
75 |
+
%61 = extractvalue { i32, i32 } %59, 1, !dbg !35
|
76 |
+
%62 = insertelement <2 x i32> poison, i32 %60, i64 0, !dbg !35
|
77 |
+
%63 = insertelement <2 x i32> %62, i32 %61, i64 1, !dbg !35
|
78 |
+
%64 = bitcast <2 x i32> %63 to <2 x float>, !dbg !35
|
79 |
+
%65 = fadd <2 x float> %46, %64, !dbg !36
|
80 |
+
%66 = insertelement <2 x float> poison, float %53, i64 0, !dbg !37
|
81 |
+
%67 = insertelement <2 x float> %66, float %54, i64 1, !dbg !37
|
82 |
+
%68 = fadd <2 x float> %67, %65, !dbg !37
|
83 |
+
%69 = extractelement <2 x float> %68, i64 0, !dbg !38
|
84 |
+
%70 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %69, float 1.000000e+00) #6, !dbg !38
|
85 |
+
%71 = extractelement <2 x float> %68, i64 1, !dbg !38
|
86 |
+
%72 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %71, float 1.000000e+00) #6, !dbg !38
|
87 |
+
%73 = insertelement <2 x float> poison, float %70, i64 0, !dbg !42
|
88 |
+
%74 = insertelement <2 x float> %73, float %72, i64 1, !dbg !42
|
89 |
+
%75 = fadd <2 x float> %74, zeroinitializer, !dbg !42
|
90 |
+
%76 = fsub <2 x float> %68, %75, !dbg !43
|
91 |
+
%77 = fmul <2 x float> %68, %76, !dbg !44
|
92 |
+
%78 = fadd <2 x float> %77, zeroinitializer, !dbg !45
|
93 |
+
%79 = or i32 %14, 128, !dbg !46
|
94 |
+
%80 = or i32 %79, %27, !dbg !25
|
95 |
+
%81 = sext i32 %80 to i64, !dbg !26
|
96 |
+
%82 = getelementptr float, ptr addrspace(1) %2, i64 %81, !dbg !26
|
97 |
+
%83 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %82, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !27
|
98 |
+
%84 = extractvalue { i32, i32 } %83, 0, !dbg !27
|
99 |
+
%85 = extractvalue { i32, i32 } %83, 1, !dbg !27
|
100 |
+
%86 = insertelement <2 x i32> poison, i32 %84, i64 0, !dbg !27
|
101 |
+
%87 = insertelement <2 x i32> %86, i32 %85, i64 1, !dbg !27
|
102 |
+
%88 = bitcast <2 x i32> %87 to <2 x float>, !dbg !27
|
103 |
+
%89 = or i32 %79, %28, !dbg !28
|
104 |
+
%90 = sext i32 %89 to i64, !dbg !29
|
105 |
+
%91 = getelementptr i16, ptr addrspace(1) %3, i64 %90, !dbg !29
|
106 |
+
%92 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %91, i1 true, i32 0, i1 true) #6, !dbg !30
|
107 |
+
%93 = trunc i32 %92 to i16, !dbg !30
|
108 |
+
%extelt.offset2.1 = lshr i32 %92, 16, !dbg !30
|
109 |
+
%94 = trunc i32 %extelt.offset2.1 to i16, !dbg !30
|
110 |
+
%95 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %93) #6, !dbg !31
|
111 |
+
%96 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %94) #6, !dbg !31
|
112 |
+
br i1 %33, label %97, label %98, !dbg !32
|
113 |
+
|
114 |
+
97: ; preds = %56
|
115 |
+
tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 1892, ptr nonnull @assertFunc_0, i64 1), !dbg !32
|
116 |
+
br label %98, !dbg !32
|
117 |
+
|
118 |
+
98: ; preds = %97, %56
|
119 |
+
%99 = zext nneg i32 %79 to i64, !dbg !33
|
120 |
+
%100 = getelementptr float, ptr addrspace(1) %37, i64 %99, !dbg !34
|
121 |
+
%101 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %100, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !35
|
122 |
+
%102 = extractvalue { i32, i32 } %101, 0, !dbg !35
|
123 |
+
%103 = extractvalue { i32, i32 } %101, 1, !dbg !35
|
124 |
+
%104 = insertelement <2 x i32> poison, i32 %102, i64 0, !dbg !35
|
125 |
+
%105 = insertelement <2 x i32> %104, i32 %103, i64 1, !dbg !35
|
126 |
+
%106 = bitcast <2 x i32> %105 to <2 x float>, !dbg !35
|
127 |
+
%107 = fadd <2 x float> %88, %106, !dbg !36
|
128 |
+
%108 = insertelement <2 x float> poison, float %95, i64 0, !dbg !37
|
129 |
+
%109 = insertelement <2 x float> %108, float %96, i64 1, !dbg !37
|
130 |
+
%110 = fadd <2 x float> %109, %107, !dbg !37
|
131 |
+
%111 = fsub <2 x float> %110, %75, !dbg !47
|
132 |
+
%112 = extractelement <2 x float> %111, i64 0, !dbg !38
|
133 |
+
%113 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %112, float 2.000000e+00) #6, !dbg !38
|
134 |
+
%114 = extractelement <2 x float> %111, i64 1, !dbg !38
|
135 |
+
%115 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %114, float 2.000000e+00) #6, !dbg !38
|
136 |
+
%116 = insertelement <2 x float> poison, float %113, i64 0, !dbg !42
|
137 |
+
%117 = insertelement <2 x float> %116, float %115, i64 1, !dbg !42
|
138 |
+
%118 = fadd <2 x float> %75, %117, !dbg !42
|
139 |
+
%119 = fsub <2 x float> %110, %118, !dbg !43
|
140 |
+
%120 = fmul <2 x float> %111, %119, !dbg !44
|
141 |
+
%121 = fadd <2 x float> %78, %120, !dbg !45
|
142 |
+
%122 = lshr i32 %9, 5, !dbg !10
|
143 |
+
%123 = and i32 %122, 1, !dbg !11
|
144 |
+
%124 = and i32 %9, 127, !dbg !11
|
145 |
+
%125 = zext nneg i32 %124 to i64, !dbg !48
|
146 |
+
%126 = getelementptr float, ptr addrspace(3) @global_smem, i64 %125, !dbg !48
|
147 |
+
store <1 x float> <float 2.000000e+00>, ptr addrspace(3) %126, align 4, !dbg !48
|
148 |
+
%127 = add nuw nsw i32 %124, 130, !dbg !48
|
149 |
+
%128 = zext nneg i32 %127 to i64, !dbg !48
|
150 |
+
%129 = getelementptr float, ptr addrspace(3) @global_smem, i64 %128, !dbg !48
|
151 |
+
store <1 x float> <float 2.000000e+00>, ptr addrspace(3) %129, align 4, !dbg !48
|
152 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !48
|
153 |
+
%130 = mul nuw nsw i32 %12, 130, !dbg !48
|
154 |
+
%131 = add nuw nsw i32 %130, %14, !dbg !48
|
155 |
+
%132 = zext nneg i32 %131 to i64, !dbg !48
|
156 |
+
%133 = getelementptr float, ptr addrspace(3) @global_smem, i64 %132, !dbg !48
|
157 |
+
%134 = load float, ptr addrspace(3) %133, align 8, !dbg !48
|
158 |
+
%135 = getelementptr inbounds <2 x float>, ptr addrspace(3) %133, i64 0, i64 1, !dbg !48
|
159 |
+
%136 = load float, ptr addrspace(3) %135, align 4, !dbg !48
|
160 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !49
|
161 |
+
%137 = extractelement <2 x float> %118, i64 0, !dbg !51
|
162 |
+
%138 = extractelement <2 x float> %118, i64 1, !dbg !55
|
163 |
+
%139 = fsub float %138, %137, !dbg !55
|
164 |
+
%140 = fadd float %134, %136, !dbg !56
|
165 |
+
%141 = fcmp oeq float %140, 0.000000e+00, !dbg !57
|
166 |
+
%142 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %136, float %140) #6, !dbg !58
|
167 |
+
%143 = select i1 %141, float 0.000000e+00, float %142, !dbg !59
|
168 |
+
%144 = fmul float %139, %143, !dbg !60
|
169 |
+
%145 = fadd float %137, %144, !dbg !51
|
170 |
+
%shift = shufflevector <2 x float> %121, <2 x float> poison, <2 x i32> <i32 1, i32 poison>, !dbg !61
|
171 |
+
%146 = fadd <2 x float> %121, %shift, !dbg !61
|
172 |
+
%147 = extractelement <2 x float> %146, i64 0, !dbg !61
|
173 |
+
%148 = fmul float %139, %139, !dbg !62
|
174 |
+
%149 = fmul float %148, %134, !dbg !63
|
175 |
+
%150 = fmul float %149, %143, !dbg !64
|
176 |
+
%151 = fadd float %147, %150, !dbg !65
|
177 |
+
%152 = bitcast float %145 to i32, !dbg !49
|
178 |
+
%153 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %152, i32 16, i32 31), !dbg !49
|
179 |
+
%154 = bitcast i32 %153 to float, !dbg !49
|
180 |
+
%155 = bitcast float %151 to i32, !dbg !49
|
181 |
+
%156 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %155, i32 16, i32 31), !dbg !49
|
182 |
+
%157 = bitcast i32 %156 to float, !dbg !49
|
183 |
+
%158 = bitcast float %140 to i32, !dbg !49
|
184 |
+
%159 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %158, i32 16, i32 31), !dbg !49
|
185 |
+
%160 = bitcast i32 %159 to float, !dbg !49
|
186 |
+
%161 = fsub float %154, %145, !dbg !55
|
187 |
+
%162 = fadd float %140, %160, !dbg !56
|
188 |
+
%163 = fcmp oeq float %162, 0.000000e+00, !dbg !57
|
189 |
+
%164 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %160, float %162) #6, !dbg !58
|
190 |
+
%165 = select i1 %163, float 0.000000e+00, float %164, !dbg !59
|
191 |
+
%166 = fmul float %161, %165, !dbg !60
|
192 |
+
%167 = fadd float %145, %166, !dbg !51
|
193 |
+
%168 = fadd float %151, %157, !dbg !61
|
194 |
+
%169 = fmul float %161, %161, !dbg !62
|
195 |
+
%170 = fmul float %140, %169, !dbg !63
|
196 |
+
%171 = fmul float %170, %165, !dbg !64
|
197 |
+
%172 = fadd float %168, %171, !dbg !65
|
198 |
+
%173 = bitcast float %167 to i32, !dbg !49
|
199 |
+
%174 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %173, i32 8, i32 31), !dbg !49
|
200 |
+
%175 = bitcast i32 %174 to float, !dbg !49
|
201 |
+
%176 = bitcast float %172 to i32, !dbg !49
|
202 |
+
%177 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %176, i32 8, i32 31), !dbg !49
|
203 |
+
%178 = bitcast i32 %177 to float, !dbg !49
|
204 |
+
%179 = bitcast float %162 to i32, !dbg !49
|
205 |
+
%180 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %179, i32 8, i32 31), !dbg !49
|
206 |
+
%181 = bitcast i32 %180 to float, !dbg !49
|
207 |
+
%182 = fsub float %175, %167, !dbg !55
|
208 |
+
%183 = fadd float %162, %181, !dbg !56
|
209 |
+
%184 = fcmp oeq float %183, 0.000000e+00, !dbg !57
|
210 |
+
%185 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %181, float %183) #6, !dbg !58
|
211 |
+
%186 = select i1 %184, float 0.000000e+00, float %185, !dbg !59
|
212 |
+
%187 = fmul float %182, %186, !dbg !60
|
213 |
+
%188 = fadd float %167, %187, !dbg !51
|
214 |
+
%189 = fadd float %172, %178, !dbg !61
|
215 |
+
%190 = fmul float %182, %182, !dbg !62
|
216 |
+
%191 = fmul float %162, %190, !dbg !63
|
217 |
+
%192 = fmul float %186, %191, !dbg !64
|
218 |
+
%193 = fadd float %189, %192, !dbg !65
|
219 |
+
%194 = bitcast float %188 to i32, !dbg !49
|
220 |
+
%195 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %194, i32 4, i32 31), !dbg !49
|
221 |
+
%196 = bitcast i32 %195 to float, !dbg !49
|
222 |
+
%197 = bitcast float %193 to i32, !dbg !49
|
223 |
+
%198 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %197, i32 4, i32 31), !dbg !49
|
224 |
+
%199 = bitcast i32 %198 to float, !dbg !49
|
225 |
+
%200 = bitcast float %183 to i32, !dbg !49
|
226 |
+
%201 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %200, i32 4, i32 31), !dbg !49
|
227 |
+
%202 = bitcast i32 %201 to float, !dbg !49
|
228 |
+
%203 = fsub float %196, %188, !dbg !55
|
229 |
+
%204 = fadd float %183, %202, !dbg !56
|
230 |
+
%205 = fcmp oeq float %204, 0.000000e+00, !dbg !57
|
231 |
+
%206 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %202, float %204) #6, !dbg !58
|
232 |
+
%207 = select i1 %205, float 0.000000e+00, float %206, !dbg !59
|
233 |
+
%208 = fmul float %203, %207, !dbg !60
|
234 |
+
%209 = fadd float %188, %208, !dbg !51
|
235 |
+
%210 = fadd float %193, %199, !dbg !61
|
236 |
+
%211 = fmul float %203, %203, !dbg !62
|
237 |
+
%212 = fmul float %183, %211, !dbg !63
|
238 |
+
%213 = fmul float %207, %212, !dbg !64
|
239 |
+
%214 = fadd float %210, %213, !dbg !65
|
240 |
+
%215 = bitcast float %209 to i32, !dbg !49
|
241 |
+
%216 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %215, i32 2, i32 31), !dbg !49
|
242 |
+
%217 = bitcast i32 %216 to float, !dbg !49
|
243 |
+
%218 = bitcast float %214 to i32, !dbg !49
|
244 |
+
%219 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %218, i32 2, i32 31), !dbg !49
|
245 |
+
%220 = bitcast i32 %219 to float, !dbg !49
|
246 |
+
%221 = bitcast float %204 to i32, !dbg !49
|
247 |
+
%222 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %221, i32 2, i32 31), !dbg !49
|
248 |
+
%223 = bitcast i32 %222 to float, !dbg !49
|
249 |
+
%224 = fsub float %217, %209, !dbg !55
|
250 |
+
%225 = fadd float %204, %223, !dbg !56
|
251 |
+
%226 = fcmp oeq float %225, 0.000000e+00, !dbg !57
|
252 |
+
%227 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %223, float %225) #6, !dbg !58
|
253 |
+
%228 = select i1 %226, float 0.000000e+00, float %227, !dbg !59
|
254 |
+
%229 = fmul float %224, %228, !dbg !60
|
255 |
+
%230 = fadd float %209, %229, !dbg !51
|
256 |
+
%231 = fadd float %214, %220, !dbg !61
|
257 |
+
%232 = fmul float %224, %224, !dbg !62
|
258 |
+
%233 = fmul float %204, %232, !dbg !63
|
259 |
+
%234 = fmul float %228, %233, !dbg !64
|
260 |
+
%235 = fadd float %231, %234, !dbg !65
|
261 |
+
%236 = bitcast float %230 to i32, !dbg !49
|
262 |
+
%237 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %236, i32 1, i32 31), !dbg !49
|
263 |
+
%238 = bitcast i32 %237 to float, !dbg !49
|
264 |
+
%239 = bitcast float %235 to i32, !dbg !49
|
265 |
+
%240 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %239, i32 1, i32 31), !dbg !49
|
266 |
+
%241 = bitcast i32 %240 to float, !dbg !49
|
267 |
+
%242 = bitcast float %225 to i32, !dbg !49
|
268 |
+
%243 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %242, i32 1, i32 31), !dbg !49
|
269 |
+
%244 = bitcast i32 %243 to float, !dbg !49
|
270 |
+
%245 = fsub float %238, %230, !dbg !55
|
271 |
+
%246 = fadd float %225, %244, !dbg !56
|
272 |
+
%247 = fcmp oeq float %246, 0.000000e+00, !dbg !57
|
273 |
+
%248 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %244, float %246) #6, !dbg !58
|
274 |
+
%249 = select i1 %247, float 0.000000e+00, float %248, !dbg !59
|
275 |
+
%250 = fmul float %245, %249, !dbg !60
|
276 |
+
%251 = fadd float %230, %250, !dbg !51
|
277 |
+
%252 = fadd float %235, %241, !dbg !61
|
278 |
+
%253 = fmul float %245, %245, !dbg !62
|
279 |
+
%254 = fmul float %225, %253, !dbg !63
|
280 |
+
%255 = fmul float %249, %254, !dbg !64
|
281 |
+
%256 = fadd float %252, %255, !dbg !65
|
282 |
+
%257 = icmp eq i32 %10, 0, !dbg !49
|
283 |
+
%258 = shl nuw nsw i32 %12, 1, !dbg !49
|
284 |
+
%259 = or i32 %258, %123, !dbg !49
|
285 |
+
%260 = zext nneg i32 %259 to i64, !dbg !49
|
286 |
+
%261 = getelementptr float, ptr addrspace(3) @global_smem, i64 %260, !dbg !49
|
287 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %261, float %251, i1 %257) #6, !dbg !49
|
288 |
+
%262 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %260, !dbg !49
|
289 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %262, float %256, i1 %257) #6, !dbg !49
|
290 |
+
%263 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 32), i64 %260, !dbg !49
|
291 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %263, float %246, i1 %257) #6, !dbg !49
|
292 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !49
|
293 |
+
%264 = icmp slt i32 %9, 4, !dbg !49
|
294 |
+
%265 = sext i32 %9 to i64, !dbg !49
|
295 |
+
%266 = getelementptr float, ptr addrspace(3) @global_smem, i64 %265, !dbg !49
|
296 |
+
%267 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %266, i1 %264) #6, !dbg !49
|
297 |
+
%268 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %265, !dbg !49
|
298 |
+
%269 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %268, i1 %264) #6, !dbg !49
|
299 |
+
%270 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 32), i64 %265, !dbg !49
|
300 |
+
%271 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %270, i1 %264) #6, !dbg !49
|
301 |
+
%272 = bitcast float %267 to i32, !dbg !49
|
302 |
+
%273 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %272, i32 1, i32 31), !dbg !49
|
303 |
+
%274 = bitcast i32 %273 to float, !dbg !49
|
304 |
+
%275 = bitcast float %269 to i32, !dbg !49
|
305 |
+
%276 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %275, i32 1, i32 31), !dbg !49
|
306 |
+
%277 = bitcast i32 %276 to float, !dbg !49
|
307 |
+
%278 = bitcast float %271 to i32, !dbg !49
|
308 |
+
%279 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %278, i32 1, i32 31), !dbg !49
|
309 |
+
%280 = bitcast i32 %279 to float, !dbg !49
|
310 |
+
%281 = fsub float %274, %267, !dbg !55
|
311 |
+
%282 = fadd float %271, %280, !dbg !56
|
312 |
+
%283 = fcmp oeq float %282, 0.000000e+00, !dbg !57
|
313 |
+
%284 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %280, float %282) #6, !dbg !58
|
314 |
+
%285 = select i1 %283, float 0.000000e+00, float %284, !dbg !59
|
315 |
+
%286 = fmul float %281, %285, !dbg !60
|
316 |
+
%287 = fadd float %267, %286, !dbg !51
|
317 |
+
%288 = fadd float %269, %277, !dbg !61
|
318 |
+
%289 = fmul float %281, %281, !dbg !62
|
319 |
+
%290 = fmul float %271, %289, !dbg !63
|
320 |
+
%291 = fmul float %290, %285, !dbg !64
|
321 |
+
%292 = fadd float %288, %291, !dbg !65
|
322 |
+
%293 = icmp eq i32 %13, 0, !dbg !49
|
323 |
+
%294 = and i1 %264, %293, !dbg !49
|
324 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %266, float %287, i1 %294) #6, !dbg !49
|
325 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %268, float %292, i1 %294) #6, !dbg !49
|
326 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %270, float %282, i1 %294) #6, !dbg !49
|
327 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !49
|
328 |
+
%295 = zext nneg i32 %258 to i64, !dbg !49
|
329 |
+
%296 = getelementptr float, ptr addrspace(3) @global_smem, i64 %295, !dbg !49
|
330 |
+
%297 = load float, ptr addrspace(3) %296, align 4, !dbg !49
|
331 |
+
%298 = getelementptr float, ptr addrspace(3) getelementptr ([0 x i8], ptr addrspace(3) @global_smem, i64 0, i64 16), i64 %295, !dbg !49
|
332 |
+
%299 = load float, ptr addrspace(3) %298, align 4, !dbg !49
|
333 |
+
%300 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %299, float 2.560000e+02) #6, !dbg !66
|
334 |
+
%301 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %299, float 2.560000e+02) #6, !dbg !66
|
335 |
+
%302 = fadd float %300, 0x3EE4F8B580000000, !dbg !67
|
336 |
+
%303 = getelementptr float, ptr addrspace(3) @global_smem, i64 %57
|
337 |
+
%304 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %40, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !68
|
338 |
+
%305 = extractvalue { i32, i32 } %304, 0, !dbg !68
|
339 |
+
%306 = extractvalue { i32, i32 } %304, 1, !dbg !68
|
340 |
+
%307 = bitcast i32 %305 to float, !dbg !68
|
341 |
+
%308 = bitcast i32 %306 to float, !dbg !68
|
342 |
+
%309 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %49, i1 true, i32 0, i1 true) #6, !dbg !69
|
343 |
+
%310 = trunc i32 %309 to i16, !dbg !69
|
344 |
+
%extelt.offset = lshr i32 %309, 16, !dbg !69
|
345 |
+
%311 = trunc i32 %extelt.offset to i16, !dbg !69
|
346 |
+
%312 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %310) #6, !dbg !70
|
347 |
+
%313 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %311) #6, !dbg !70
|
348 |
+
%314 = getelementptr float, ptr addrspace(1) %4, i64 %125, !dbg !71
|
349 |
+
%315 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %314, i1 true, i32 0, i1 true) #6, !dbg !72
|
350 |
+
br i1 %33, label %316, label %317, !dbg !73
|
351 |
+
|
352 |
+
316: ; preds = %98
|
353 |
+
tail call void @__assertfail(ptr nonnull @assertMessage_1, ptr nonnull @assertFile_1, i32 1892, ptr nonnull @assertFunc_1, i64 1), !dbg !73
|
354 |
+
br label %317, !dbg !73
|
355 |
+
|
356 |
+
317: ; preds = %316, %98
|
357 |
+
%318 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %58, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !74
|
358 |
+
%319 = extractvalue { i32, i32 } %318, 0, !dbg !74
|
359 |
+
%320 = extractvalue { i32, i32 } %318, 1, !dbg !74
|
360 |
+
%321 = bitcast i32 %319 to float, !dbg !74
|
361 |
+
%322 = bitcast i32 %320 to float, !dbg !74
|
362 |
+
%323 = fadd float %307, %321, !dbg !75
|
363 |
+
%324 = fadd float %308, %322, !dbg !75
|
364 |
+
%325 = fadd float %312, %323, !dbg !76
|
365 |
+
%326 = fadd float %313, %324, !dbg !76
|
366 |
+
%327 = fsub float %325, %297, !dbg !77
|
367 |
+
%328 = fsub float %326, %297, !dbg !77
|
368 |
+
%329 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
|
369 |
+
%.not.i = icmp eq i32 %329, 0, !dbg !78
|
370 |
+
br i1 %.not.i, label %332, label %330, !dbg !78
|
371 |
+
|
372 |
+
330: ; preds = %317
|
373 |
+
%331 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %302), !dbg !78
|
374 |
+
br label %__nv_rsqrtf.exit, !dbg !78
|
375 |
+
|
376 |
+
332: ; preds = %317
|
377 |
+
%333 = tail call float @llvm.nvvm.rsqrt.approx.f(float %302), !dbg !78
|
378 |
+
br label %__nv_rsqrtf.exit, !dbg !78
|
379 |
+
|
380 |
+
__nv_rsqrtf.exit: ; preds = %330, %332
|
381 |
+
%.0.i = phi float [ %331, %330 ], [ %333, %332 ], !dbg !78
|
382 |
+
%334 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
|
383 |
+
%335 = fmul float %327, %.0.i, !dbg !79
|
384 |
+
%336 = fmul float %328, %.0.i, !dbg !79
|
385 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !80
|
386 |
+
store i32 %315, ptr addrspace(3) %126, align 4, !dbg !80
|
387 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !80
|
388 |
+
%337 = load float, ptr addrspace(3) %303, align 8, !dbg !80
|
389 |
+
%338 = getelementptr inbounds <2 x float>, ptr addrspace(3) %303, i64 0, i64 1, !dbg !80
|
390 |
+
%339 = load float, ptr addrspace(3) %338, align 4, !dbg !80
|
391 |
+
%340 = fmul float %335, %337, !dbg !80
|
392 |
+
%341 = fmul float %336, %339, !dbg !80
|
393 |
+
%342 = getelementptr i16, ptr addrspace(1) %5, i64 %48, !dbg !81
|
394 |
+
%343 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %340) #6, !dbg !82
|
395 |
+
%344 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %341) #6, !dbg !82
|
396 |
+
%345 = insertelement <2 x i16> undef, i16 %343, i64 0, !dbg !82
|
397 |
+
%346 = insertelement <2 x i16> %345, i16 %344, i64 1, !dbg !82
|
398 |
+
%347 = bitcast <2 x i16> %346 to i32, !dbg !82
|
399 |
+
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %347, ptr addrspace(1) %342, i1 true) #6, !dbg !82
|
400 |
+
%348 = or i32 %124, 128, !dbg !83
|
401 |
+
%349 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_last.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %82, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !68
|
402 |
+
%350 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %91, i1 true, i32 0, i1 true) #6, !dbg !69
|
403 |
+
%351 = trunc i32 %350 to i16, !dbg !69
|
404 |
+
%extelt.offset.1 = lshr i32 %350, 16, !dbg !69
|
405 |
+
%352 = trunc i32 %extelt.offset.1 to i16, !dbg !69
|
406 |
+
%353 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %351) #6, !dbg !70
|
407 |
+
%354 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %352) #6, !dbg !70
|
408 |
+
%355 = zext nneg i32 %348 to i64, !dbg !71
|
409 |
+
%356 = getelementptr float, ptr addrspace(1) %4, i64 %355, !dbg !71
|
410 |
+
%357 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u32 $0, $3;", "=r,l,b,r,b"(ptr addrspace(1) %356, i1 true, i32 0, i1 true) #6, !dbg !72
|
411 |
+
br i1 %33, label %358, label %359, !dbg !73
|
412 |
+
|
413 |
+
358: ; preds = %__nv_rsqrtf.exit
|
414 |
+
tail call void @__assertfail(ptr nonnull @assertMessage_1, ptr nonnull @assertFile_1, i32 1892, ptr nonnull @assertFunc_1, i64 1), !dbg !73
|
415 |
+
br label %359, !dbg !73
|
416 |
+
|
417 |
+
359: ; preds = %358, %__nv_rsqrtf.exit
|
418 |
+
%360 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %100, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !74
|
419 |
+
%361 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
|
420 |
+
%.not.i.1 = icmp eq i32 %361, 0, !dbg !78
|
421 |
+
br i1 %.not.i.1, label %364, label %362, !dbg !78
|
422 |
+
|
423 |
+
362: ; preds = %359
|
424 |
+
%363 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %302), !dbg !78
|
425 |
+
br label %__nv_rsqrtf.exit.1, !dbg !78
|
426 |
+
|
427 |
+
364: ; preds = %359
|
428 |
+
%365 = tail call float @llvm.nvvm.rsqrt.approx.f(float %302), !dbg !78
|
429 |
+
br label %__nv_rsqrtf.exit.1, !dbg !78
|
430 |
+
|
431 |
+
__nv_rsqrtf.exit.1: ; preds = %364, %362
|
432 |
+
%.0.i.1 = phi float [ %363, %362 ], [ %365, %364 ], !dbg !78
|
433 |
+
%366 = extractvalue { i32, i32 } %349, 1, !dbg !68
|
434 |
+
%367 = bitcast i32 %366 to float, !dbg !68
|
435 |
+
%368 = extractvalue { i32, i32 } %360, 1, !dbg !74
|
436 |
+
%369 = bitcast i32 %368 to float, !dbg !74
|
437 |
+
%370 = fadd float %367, %369, !dbg !75
|
438 |
+
%371 = fadd float %354, %370, !dbg !76
|
439 |
+
%372 = fsub float %371, %297, !dbg !77
|
440 |
+
%373 = extractvalue { i32, i32 } %349, 0, !dbg !68
|
441 |
+
%374 = bitcast i32 %373 to float, !dbg !68
|
442 |
+
%375 = extractvalue { i32, i32 } %360, 0, !dbg !74
|
443 |
+
%376 = bitcast i32 %375 to float, !dbg !74
|
444 |
+
%377 = fadd float %374, %376, !dbg !75
|
445 |
+
%378 = fadd float %353, %377, !dbg !76
|
446 |
+
%379 = fsub float %378, %297, !dbg !77
|
447 |
+
%380 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !78
|
448 |
+
%381 = fmul float %379, %.0.i.1, !dbg !79
|
449 |
+
%382 = fmul float %372, %.0.i.1, !dbg !79
|
450 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !80
|
451 |
+
store i32 %357, ptr addrspace(3) %126, align 4, !dbg !80
|
452 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !80
|
453 |
+
%383 = load float, ptr addrspace(3) %303, align 8, !dbg !80
|
454 |
+
%384 = load float, ptr addrspace(3) %338, align 4, !dbg !80
|
455 |
+
%385 = fmul float %381, %383, !dbg !80
|
456 |
+
%386 = fmul float %382, %384, !dbg !80
|
457 |
+
%387 = getelementptr i16, ptr addrspace(1) %5, i64 %90, !dbg !81
|
458 |
+
%388 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %385) #6, !dbg !82
|
459 |
+
%389 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %386) #6, !dbg !82
|
460 |
+
%390 = insertelement <2 x i16> undef, i16 %388, i64 0, !dbg !82
|
461 |
+
%391 = insertelement <2 x i16> %390, i16 %389, i64 1, !dbg !82
|
462 |
+
%392 = bitcast <2 x i16> %391 to i32, !dbg !82
|
463 |
+
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %392, ptr addrspace(1) %387, i1 true) #6, !dbg !82
|
464 |
+
ret void, !dbg !84
|
465 |
+
}
|
466 |
+
|
467 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
468 |
+
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
|
469 |
+
|
470 |
+
; Function Attrs: convergent nocallback nounwind
|
471 |
+
declare void @llvm.nvvm.barrier0() #1
|
472 |
+
|
473 |
+
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
|
474 |
+
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #2
|
475 |
+
|
476 |
+
; Function Attrs: alwaysinline nounwind
|
477 |
+
define float @__nv_rsqrtf(float %x) local_unnamed_addr #3 {
|
478 |
+
%1 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6
|
479 |
+
%.not = icmp eq i32 %1, 0
|
480 |
+
br i1 %.not, label %4, label %2
|
481 |
+
|
482 |
+
2: ; preds = %0
|
483 |
+
%3 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %x)
|
484 |
+
br label %6
|
485 |
+
|
486 |
+
4: ; preds = %0
|
487 |
+
%5 = tail call float @llvm.nvvm.rsqrt.approx.f(float %x)
|
488 |
+
br label %6
|
489 |
+
|
490 |
+
6: ; preds = %4, %2
|
491 |
+
%.0 = phi float [ %3, %2 ], [ %5, %4 ]
|
492 |
+
ret float %.0
|
493 |
+
}
|
494 |
+
|
495 |
+
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
|
496 |
+
|
497 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
|
498 |
+
declare float @llvm.nvvm.rsqrt.approx.ftz.f(float) #5
|
499 |
+
|
500 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
|
501 |
+
declare float @llvm.nvvm.rsqrt.approx.f(float) #5
|
502 |
+
|
503 |
+
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
504 |
+
attributes #1 = { convergent nocallback nounwind }
|
505 |
+
attributes #2 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
|
506 |
+
attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
507 |
+
attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
508 |
+
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
|
509 |
+
attributes #6 = { nounwind }
|
510 |
+
|
511 |
+
!llvm.module.flags = !{!0, !1}
|
512 |
+
!llvm.dbg.cu = !{!2}
|
513 |
+
!nvvm.annotations = !{!4, !5, !5, !4}
|
514 |
+
!llvm.ident = !{!6}
|
515 |
+
|
516 |
+
!0 = !{i32 2, !"Debug Info Version", i32 3}
|
517 |
+
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
|
518 |
+
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
519 |
+
!3 = !DIFile(filename: "ccig6fki6p4lxrdmgg6eudahiexcvueeol2p4qp532pvve2y463y.py", directory: "/tmp/torchinductor_root/ci")
|
520 |
+
!4 = !{ptr @triton__0d1d2d3d4d5d6de7de, !"kernel", i32 1}
|
521 |
+
!5 = !{ptr @triton__0d1d2d3d4d5d6de7de, !"maxntidx", i32 128}
|
522 |
+
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
|
523 |
+
!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6de7de", linkageName: "triton__0d1d2d3d4d5d6de7de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
524 |
+
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
|
525 |
+
!9 = !{}
|
526 |
+
!10 = !DILocation(line: 22, column: 44, scope: !7)
|
527 |
+
!11 = !DILocation(line: 24, column: 33, scope: !7)
|
528 |
+
!12 = !DILocation(line: 21, column: 28, scope: !7)
|
529 |
+
!13 = !DILocation(line: 21, column: 33, scope: !7)
|
530 |
+
!14 = !DILocation(line: 22, column: 23, scope: !7)
|
531 |
+
!15 = !DILocation(line: 26, column: 30, scope: !7)
|
532 |
+
!16 = !DILocation(line: 26, column: 35, scope: !7)
|
533 |
+
!17 = !DILocation(line: 27, column: 18, scope: !7)
|
534 |
+
!18 = !DILocation(line: 35, column: 44, scope: !7)
|
535 |
+
!19 = !DILocation(line: 36, column: 44, scope: !7)
|
536 |
+
!20 = !DILocation(line: 37, column: 22, scope: !7)
|
537 |
+
!21 = !DILocation(line: 38, column: 22, scope: !7)
|
538 |
+
!22 = !DILocation(line: 39, column: 36, scope: !7)
|
539 |
+
!23 = !DILocation(line: 40, column: 40, scope: !7)
|
540 |
+
!24 = !DILocation(line: 41, column: 44, scope: !7)
|
541 |
+
!25 = !DILocation(line: 35, column: 40, scope: !7)
|
542 |
+
!26 = !DILocation(line: 35, column: 34, scope: !7)
|
543 |
+
!27 = !DILocation(line: 35, column: 50, scope: !7)
|
544 |
+
!28 = !DILocation(line: 36, column: 40, scope: !7)
|
545 |
+
!29 = !DILocation(line: 36, column: 34, scope: !7)
|
546 |
+
!30 = !DILocation(line: 36, column: 50, scope: !7)
|
547 |
+
!31 = !DILocation(line: 36, column: 101, scope: !7)
|
548 |
+
!32 = !DILocation(line: 40, column: 55, scope: !7)
|
549 |
+
!33 = !DILocation(line: 41, column: 40, scope: !7)
|
550 |
+
!34 = !DILocation(line: 41, column: 34, scope: !7)
|
551 |
+
!35 = !DILocation(line: 41, column: 52, scope: !7)
|
552 |
+
!36 = !DILocation(line: 42, column: 22, scope: !7)
|
553 |
+
!37 = !DILocation(line: 44, column: 22, scope: !7)
|
554 |
+
!38 = !DILocation(line: 98, column: 30, scope: !39, inlinedAt: !41)
|
555 |
+
!39 = distinct !DILexicalBlockFile(scope: !7, file: !40, discriminator: 0)
|
556 |
+
!40 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
|
557 |
+
!41 = !DILocation(line: 47, column: 41, scope: !39)
|
558 |
+
!42 = !DILocation(line: 98, column: 22, scope: !39, inlinedAt: !41)
|
559 |
+
!43 = !DILocation(line: 101, column: 30, scope: !39, inlinedAt: !41)
|
560 |
+
!44 = !DILocation(line: 101, column: 22, scope: !39, inlinedAt: !41)
|
561 |
+
!45 = !DILocation(line: 50, column: 50, scope: !7)
|
562 |
+
!46 = !DILocation(line: 32, column: 27, scope: !7)
|
563 |
+
!47 = !DILocation(line: 96, column: 20, scope: !39, inlinedAt: !41)
|
564 |
+
!48 = !DILocation(line: 31, column: 36, scope: !7)
|
565 |
+
!49 = !DILocation(line: 120, column: 46, scope: !39, inlinedAt: !50)
|
566 |
+
!50 = !DILocation(line: 53, column: 44, scope: !39)
|
567 |
+
!51 = !DILocation(line: 112, column: 17, scope: !52, inlinedAt: !53)
|
568 |
+
!52 = distinct !DILexicalBlockFile(scope: !39, file: !40, discriminator: 0)
|
569 |
+
!53 = !DILocation(line: 120, column: 46, scope: !52, inlinedAt: !54)
|
570 |
+
!54 = !DILocation(line: 53, column: 44, scope: !52)
|
571 |
+
!55 = !DILocation(line: 108, column: 21, scope: !52, inlinedAt: !53)
|
572 |
+
!56 = !DILocation(line: 109, column: 28, scope: !52, inlinedAt: !53)
|
573 |
+
!57 = !DILocation(line: 110, column: 39, scope: !52, inlinedAt: !53)
|
574 |
+
!58 = !DILocation(line: 110, column: 60, scope: !52, inlinedAt: !53)
|
575 |
+
!59 = !DILocation(line: 110, column: 49, scope: !52, inlinedAt: !53)
|
576 |
+
!60 = !DILocation(line: 112, column: 25, scope: !52, inlinedAt: !53)
|
577 |
+
!61 = !DILocation(line: 113, column: 15, scope: !52, inlinedAt: !53)
|
578 |
+
!62 = !DILocation(line: 113, column: 30, scope: !52, inlinedAt: !53)
|
579 |
+
!63 = !DILocation(line: 113, column: 38, scope: !52, inlinedAt: !53)
|
580 |
+
!64 = !DILocation(line: 113, column: 49, scope: !52, inlinedAt: !53)
|
581 |
+
!65 = !DILocation(line: 113, column: 22, scope: !52, inlinedAt: !53)
|
582 |
+
!66 = !DILocation(line: 75, column: 24, scope: !7)
|
583 |
+
!67 = !DILocation(line: 77, column: 24, scope: !7)
|
584 |
+
!68 = !DILocation(line: 62, column: 51, scope: !7)
|
585 |
+
!69 = !DILocation(line: 63, column: 51, scope: !7)
|
586 |
+
!70 = !DILocation(line: 63, column: 103, scope: !7)
|
587 |
+
!71 = !DILocation(line: 64, column: 35, scope: !7)
|
588 |
+
!72 = !DILocation(line: 64, column: 40, scope: !7)
|
589 |
+
!73 = !DILocation(line: 68, column: 57, scope: !7)
|
590 |
+
!74 = !DILocation(line: 69, column: 54, scope: !7)
|
591 |
+
!75 = !DILocation(line: 70, column: 24, scope: !7)
|
592 |
+
!76 = !DILocation(line: 72, column: 24, scope: !7)
|
593 |
+
!77 = !DILocation(line: 73, column: 24, scope: !7)
|
594 |
+
!78 = !DILocation(line: 78, column: 30, scope: !7)
|
595 |
+
!79 = !DILocation(line: 79, column: 24, scope: !7)
|
596 |
+
!80 = !DILocation(line: 80, column: 24, scope: !7)
|
597 |
+
!81 = !DILocation(line: 82, column: 29, scope: !7)
|
598 |
+
!82 = !DILocation(line: 82, column: 52, scope: !7)
|
599 |
+
!83 = !DILocation(line: 59, column: 27, scope: !7)
|
600 |
+
!84 = !DILocation(line: 58, column: 4, scope: !7)
|
.triton/dump/76fb48b96c75cb8e388c291a18ef9b02/triton_.ttir
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1d2d3d4d5d6de7de(%arg0: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg5: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
3 |
+
%cst = arith.constant dense<0.000000e+00> : tensor<2x128xbf16>
|
4 |
+
%cst_0 = arith.constant 0.000000e+00 : f32
|
5 |
+
%cst_1 = arith.constant dense<1.000000e+00> : tensor<2x128xf32>
|
6 |
+
%c256_i32 = arith.constant 256 : i32
|
7 |
+
%c128_i32 = arith.constant 128 : i32
|
8 |
+
%c0_i32 = arith.constant 0 : i32
|
9 |
+
%cst_2 = arith.constant dense<256> : tensor<2x1xi64>
|
10 |
+
%cst_3 = arith.constant dense<0> : tensor<2x1xi64>
|
11 |
+
%cst_4 = arith.constant dense<50257> : tensor<2x1xi64>
|
12 |
+
%cst_5 = arith.constant dense<9.99999974E-6> : tensor<2x1xf32>
|
13 |
+
%cst_6 = arith.constant dense<2.560000e+02> : tensor<2x1xf32>
|
14 |
+
%cst_7 = arith.constant dense<0.000000e+00> : tensor<1x128xf32>
|
15 |
+
%cst_8 = arith.constant dense<0.000000e+00> : tensor<2x128xf32>
|
16 |
+
%cst_9 = arith.constant dense<256> : tensor<2x1xi32>
|
17 |
+
%cst_10 = arith.constant dense<256> : tensor<1x128xi32>
|
18 |
+
%cst_11 = arith.constant dense<512> : tensor<2x1xi32>
|
19 |
+
%c2_i32 = arith.constant 2 : i32
|
20 |
+
%0 = tt.get_program_id x : i32
|
21 |
+
%1 = arith.muli %0, %c2_i32 : i32
|
22 |
+
%2 = tt.make_range {end = 2 : i32, start = 0 : i32} : tensor<2xi32>
|
23 |
+
%3 = tt.expand_dims %2 {axis = 1 : i32} : (tensor<2xi32>) -> tensor<2x1xi32>
|
24 |
+
%4 = tt.splat %1 : (i32) -> tensor<2x1xi32>
|
25 |
+
%5 = arith.addi %4, %3 : tensor<2x1xi32>
|
26 |
+
%6 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
|
27 |
+
%7 = tt.expand_dims %6 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32>
|
28 |
+
%8 = tt.splat %arg0 : (!tt.ptr<i64, 1>) -> tensor<2x1x!tt.ptr<i64, 1>>
|
29 |
+
%9 = tt.addptr %8, %5 : tensor<2x1x!tt.ptr<i64, 1>>, tensor<2x1xi32>
|
30 |
+
%10 = tt.load %9 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x1xi64>
|
31 |
+
%11 = arith.remsi %5, %cst_11 : tensor<2x1xi32>
|
32 |
+
%12 = arith.muli %11, %cst_9 : tensor<2x1xi32>
|
33 |
+
%13 = tt.broadcast %12 : (tensor<2x1xi32>) -> tensor<2x128xi32>
|
34 |
+
%14 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
|
35 |
+
%15 = arith.muli %5, %cst_9 : tensor<2x1xi32>
|
36 |
+
%16 = tt.broadcast %15 : (tensor<2x1xi32>) -> tensor<2x128xi32>
|
37 |
+
%17 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
|
38 |
+
%18 = arith.addi %10, %cst_4 : tensor<2x1xi64>
|
39 |
+
%19 = arith.cmpi slt, %10, %cst_3 : tensor<2x1xi64>
|
40 |
+
%20 = arith.select %19, %18, %10 : tensor<2x1xi1>, tensor<2x1xi64>
|
41 |
+
%21 = arith.cmpi sge, %20, %cst_3 : tensor<2x1xi64>
|
42 |
+
%22 = arith.cmpi slt, %20, %cst_4 : tensor<2x1xi64>
|
43 |
+
%23 = arith.andi %21, %22 : tensor<2x1xi1>
|
44 |
+
%24 = arith.muli %20, %cst_2 : tensor<2x1xi64>
|
45 |
+
%25 = tt.broadcast %24 : (tensor<2x1xi64>) -> tensor<2x128xi64>
|
46 |
+
%26 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
|
47 |
+
%27:3 = scf.for %arg8 = %c0_i32 to %c256_i32 step %c128_i32 iter_args(%arg9 = %cst_8, %arg10 = %cst_8, %arg11 = %cst_8) -> (tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>) : i32 {
|
48 |
+
%51 = tt.splat %arg8 : (i32) -> tensor<1x128xi32>
|
49 |
+
%52 = arith.addi %51, %7 : tensor<1x128xi32>
|
50 |
+
%53 = arith.cmpi slt, %52, %cst_10 : tensor<1x128xi32>
|
51 |
+
%54 = tt.broadcast %52 : (tensor<1x128xi32>) -> tensor<2x128xi32>
|
52 |
+
%55 = arith.addi %54, %13 : tensor<2x128xi32>
|
53 |
+
%56 = tt.addptr %14, %55 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi32>
|
54 |
+
%57 = tt.broadcast %53 : (tensor<1x128xi1>) -> tensor<2x128xi1>
|
55 |
+
%58 = tt.load %56, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
|
56 |
+
%59 = arith.addi %54, %16 : tensor<2x128xi32>
|
57 |
+
%60 = tt.addptr %17, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
|
58 |
+
%61 = tt.load %60, %57, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xbf16>
|
59 |
+
%62 = arith.extf %61 : tensor<2x128xbf16> to tensor<2x128xf32>
|
60 |
+
tt.assert %23, "index out of bounds: 0 <= tmp3 < 50257", "/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py", "<module>", 1892 : tensor<2x1xi1>
|
61 |
+
%63 = arith.extsi %52 : tensor<1x128xi32> to tensor<1x128xi64>
|
62 |
+
%64 = tt.broadcast %63 : (tensor<1x128xi64>) -> tensor<2x128xi64>
|
63 |
+
%65 = arith.addi %64, %25 : tensor<2x128xi64>
|
64 |
+
%66 = tt.addptr %26, %65 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi64>
|
65 |
+
%67 = tt.load %66, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
|
66 |
+
%68 = arith.addf %67, %58 : tensor<2x128xf32>
|
67 |
+
%69 = arith.addf %68, %62 : tensor<2x128xf32>
|
68 |
+
%70 = arith.subf %69, %arg9 : tensor<2x128xf32>
|
69 |
+
%71 = arith.addf %arg11, %cst_1 : tensor<2x128xf32>
|
70 |
+
%72 = arith.divf %70, %71 : tensor<2x128xf32>
|
71 |
+
%73 = arith.addf %arg9, %72 : tensor<2x128xf32>
|
72 |
+
%74 = arith.subf %69, %73 : tensor<2x128xf32>
|
73 |
+
%75 = arith.mulf %70, %74 : tensor<2x128xf32>
|
74 |
+
%76 = arith.addf %arg10, %75 : tensor<2x128xf32>
|
75 |
+
%77 = arith.select %57, %73, %arg9 : tensor<2x128xi1>, tensor<2x128xf32>
|
76 |
+
%78 = arith.select %57, %76, %arg10 : tensor<2x128xi1>, tensor<2x128xf32>
|
77 |
+
%79 = arith.select %57, %71, %arg11 : tensor<2x128xi1>, tensor<2x128xf32>
|
78 |
+
scf.yield %77, %78, %79 : tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>
|
79 |
+
}
|
80 |
+
%28:3 = "tt.reduce"(%27#0, %27#1, %27#2) <{axis = 1 : i32}> ({
|
81 |
+
^bb0(%arg8: f32, %arg9: f32, %arg10: f32, %arg11: f32, %arg12: f32, %arg13: f32):
|
82 |
+
%51 = arith.subf %arg11, %arg8 : f32
|
83 |
+
%52 = arith.addf %arg10, %arg13 : f32
|
84 |
+
%53 = arith.cmpf oeq, %52, %cst_0 : f32
|
85 |
+
%54 = arith.divf %arg13, %52 : f32
|
86 |
+
%55 = arith.select %53, %cst_0, %54 : f32
|
87 |
+
%56 = arith.mulf %51, %55 : f32
|
88 |
+
%57 = arith.addf %arg8, %56 : f32
|
89 |
+
%58 = arith.addf %arg9, %arg12 : f32
|
90 |
+
%59 = arith.mulf %51, %51 : f32
|
91 |
+
%60 = arith.mulf %59, %arg10 : f32
|
92 |
+
%61 = arith.mulf %60, %55 : f32
|
93 |
+
%62 = arith.addf %58, %61 : f32
|
94 |
+
tt.reduce.return %57, %62, %52 : f32, f32, f32
|
95 |
+
}) : (tensor<2x128xf32>, tensor<2x128xf32>, tensor<2x128xf32>) -> (tensor<2xf32>, tensor<2xf32>, tensor<2xf32>)
|
96 |
+
%29 = tt.expand_dims %28#0 {axis = 1 : i32} : (tensor<2xf32>) -> tensor<2x1xf32>
|
97 |
+
%30 = tt.expand_dims %28#1 {axis = 1 : i32} : (tensor<2xf32>) -> tensor<2x1xf32>
|
98 |
+
%31 = arith.muli %11, %cst_9 : tensor<2x1xi32>
|
99 |
+
%32 = tt.broadcast %31 : (tensor<2x1xi32>) -> tensor<2x128xi32>
|
100 |
+
%33 = tt.splat %arg2 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
|
101 |
+
%34 = arith.muli %5, %cst_9 : tensor<2x1xi32>
|
102 |
+
%35 = tt.broadcast %34 : (tensor<2x1xi32>) -> tensor<2x128xi32>
|
103 |
+
%36 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
|
104 |
+
%37 = tt.splat %arg4 : (!tt.ptr<f32, 1>) -> tensor<1x128x!tt.ptr<f32, 1>>
|
105 |
+
%38 = arith.addi %10, %cst_4 : tensor<2x1xi64>
|
106 |
+
%39 = arith.cmpi slt, %10, %cst_3 : tensor<2x1xi64>
|
107 |
+
%40 = arith.select %39, %38, %10 : tensor<2x1xi1>, tensor<2x1xi64>
|
108 |
+
%41 = arith.cmpi sge, %40, %cst_3 : tensor<2x1xi64>
|
109 |
+
%42 = arith.cmpi slt, %40, %cst_4 : tensor<2x1xi64>
|
110 |
+
%43 = arith.andi %41, %42 : tensor<2x1xi1>
|
111 |
+
%44 = arith.muli %40, %cst_2 : tensor<2x1xi64>
|
112 |
+
%45 = tt.broadcast %44 : (tensor<2x1xi64>) -> tensor<2x128xi64>
|
113 |
+
%46 = tt.splat %arg1 : (!tt.ptr<f32, 1>) -> tensor<2x128x!tt.ptr<f32, 1>>
|
114 |
+
%47 = tt.broadcast %29 : (tensor<2x1xf32>) -> tensor<2x128xf32>
|
115 |
+
%48 = arith.divf %30, %cst_6 : tensor<2x1xf32>
|
116 |
+
%49 = arith.addf %48, %cst_5 : tensor<2x1xf32>
|
117 |
+
%50 = tt.splat %arg5 : (!tt.ptr<bf16, 1>) -> tensor<2x128x!tt.ptr<bf16, 1>>
|
118 |
+
scf.for %arg8 = %c0_i32 to %c256_i32 step %c128_i32 : i32 {
|
119 |
+
%51 = tt.splat %arg8 : (i32) -> tensor<1x128xi32>
|
120 |
+
%52 = arith.addi %51, %7 : tensor<1x128xi32>
|
121 |
+
%53 = arith.cmpi slt, %52, %cst_10 : tensor<1x128xi32>
|
122 |
+
%54 = tt.broadcast %52 : (tensor<1x128xi32>) -> tensor<2x128xi32>
|
123 |
+
%55 = arith.addi %54, %32 : tensor<2x128xi32>
|
124 |
+
%56 = tt.addptr %33, %55 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi32>
|
125 |
+
%57 = tt.broadcast %53 : (tensor<1x128xi1>) -> tensor<2x128xi1>
|
126 |
+
%58 = tt.load %56, %57, %cst_8 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<2x128xf32>
|
127 |
+
%59 = arith.addi %54, %35 : tensor<2x128xi32>
|
128 |
+
%60 = tt.addptr %36, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
|
129 |
+
%61 = tt.load %60, %57, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<2x128xbf16>
|
130 |
+
%62 = arith.extf %61 : tensor<2x128xbf16> to tensor<2x128xf32>
|
131 |
+
%63 = tt.addptr %37, %52 : tensor<1x128x!tt.ptr<f32, 1>>, tensor<1x128xi32>
|
132 |
+
%64 = tt.load %63, %53, %cst_7 {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x128xf32>
|
133 |
+
tt.assert %43, "index out of bounds: 0 <= tmp16 < 50257", "/usr/local/lib/python3.10/dist-packages/torch/_inductor/codecache.py", "<module>", 1892 : tensor<2x1xi1>
|
134 |
+
%65 = arith.extsi %52 : tensor<1x128xi32> to tensor<1x128xi64>
|
135 |
+
%66 = tt.broadcast %65 : (tensor<1x128xi64>) -> tensor<2x128xi64>
|
136 |
+
%67 = arith.addi %66, %45 : tensor<2x128xi64>
|
137 |
+
%68 = tt.addptr %46, %67 : tensor<2x128x!tt.ptr<f32, 1>>, tensor<2x128xi64>
|
138 |
+
%69 = tt.load %68, %57, %cst_8 {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<2x128xf32>
|
139 |
+
%70 = arith.addf %69, %58 : tensor<2x128xf32>
|
140 |
+
%71 = arith.addf %70, %62 : tensor<2x128xf32>
|
141 |
+
%72 = arith.subf %71, %47 : tensor<2x128xf32>
|
142 |
+
%73 = tt.extern_elementwise %49 {libname = "libdevice", libpath = "/usr/local/lib/python3.10/dist-packages/triton/language/../third_party/cuda/lib/libdevice.10.bc", pure = true, symbol = "__nv_rsqrtf"} : (tensor<2x1xf32>) -> tensor<2x1xf32>
|
143 |
+
%74 = tt.broadcast %73 : (tensor<2x1xf32>) -> tensor<2x128xf32>
|
144 |
+
%75 = arith.mulf %72, %74 : tensor<2x128xf32>
|
145 |
+
%76 = tt.broadcast %64 : (tensor<1x128xf32>) -> tensor<2x128xf32>
|
146 |
+
%77 = arith.mulf %75, %76 : tensor<2x128xf32>
|
147 |
+
%78 = tt.addptr %50, %59 : tensor<2x128x!tt.ptr<bf16, 1>>, tensor<2x128xi32>
|
148 |
+
%79 = arith.truncf %77 : tensor<2x128xf32> to tensor<2x128xbf16>
|
149 |
+
tt.store %78, %79, %57 {cache = 1 : i32, evict = 1 : i32} : tensor<2x128xbf16>
|
150 |
+
}
|
151 |
+
tt.return
|
152 |
+
}
|
153 |
+
}
|
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.cubin
ADDED
Binary file (39.4 kB). View file
|
|
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.llir
ADDED
@@ -0,0 +1,745 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
; ModuleID = 'LLVMDialectModule'
|
2 |
+
source_filename = "LLVMDialectModule"
|
3 |
+
|
4 |
+
@global_smem = external addrspace(3) global [0 x i8]
|
5 |
+
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
|
6 |
+
|
7 |
+
define void @triton__0d1d2d3d4de5(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, i64 %4, i64 %5) local_unnamed_addr !dbg !7 {
|
8 |
+
%7 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
|
9 |
+
%8 = lshr i32 %7, 5, !dbg !10
|
10 |
+
%urem = and i32 %7, 255, !dbg !10
|
11 |
+
%9 = or i32 %urem, 256, !dbg !10
|
12 |
+
%10 = or i32 %urem, 512, !dbg !10
|
13 |
+
%11 = or i32 %urem, 768, !dbg !10
|
14 |
+
%12 = or i32 %urem, 1024, !dbg !10
|
15 |
+
%13 = or i32 %urem, 1280, !dbg !10
|
16 |
+
%14 = or i32 %urem, 1536, !dbg !10
|
17 |
+
%15 = or i32 %urem, 1792, !dbg !10
|
18 |
+
%16 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #5, !dbg !11
|
19 |
+
%17 = sext i32 %16 to i64, !dbg !12
|
20 |
+
%18 = insertelement <8 x i32> poison, i32 %urem, i64 0
|
21 |
+
%19 = insertelement <8 x i32> %18, i32 %9, i64 1
|
22 |
+
%20 = insertelement <8 x i32> %19, i32 %10, i64 2
|
23 |
+
%21 = insertelement <8 x i32> %20, i32 %11, i64 3
|
24 |
+
%22 = insertelement <8 x i32> %21, i32 %12, i64 4
|
25 |
+
%23 = insertelement <8 x i32> %22, i32 %13, i64 5
|
26 |
+
%24 = insertelement <8 x i32> %23, i32 %14, i64 6
|
27 |
+
%25 = insertelement <8 x i32> %24, i32 %15, i64 7
|
28 |
+
%26 = zext <8 x i32> %25 to <8 x i64>
|
29 |
+
%27 = mul nsw i64 %17, 50257, !dbg !13
|
30 |
+
%invariant.gep = getelementptr i16, ptr addrspace(1) %0, i64 %27, !dbg !14
|
31 |
+
br label %28, !dbg !14
|
32 |
+
|
33 |
+
28: ; preds = %6, %28
|
34 |
+
%29 = phi i32 [ 0, %6 ], [ %81, %28 ]
|
35 |
+
%30 = phi <8 x float> [ <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, %6 ], [ %80, %28 ]
|
36 |
+
%31 = zext nneg i32 %29 to i64, !dbg !15
|
37 |
+
%32 = fcmp ord <8 x float> %30, zeroinitializer, !dbg !16
|
38 |
+
%33 = insertelement <8 x i64> poison, i64 %31, i64 0, !dbg !15
|
39 |
+
%34 = shufflevector <8 x i64> %33, <8 x i64> poison, <8 x i32> zeroinitializer, !dbg !15
|
40 |
+
%35 = or <8 x i64> %34, %26, !dbg !15
|
41 |
+
%36 = icmp ult <8 x i64> %35, <i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !20
|
42 |
+
%37 = extractelement <8 x i64> %35, i64 0, !dbg !21
|
43 |
+
%gep = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %37, !dbg !21
|
44 |
+
%38 = extractelement <8 x i64> %35, i64 1, !dbg !21
|
45 |
+
%gep21 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %38, !dbg !21
|
46 |
+
%39 = extractelement <8 x i64> %35, i64 2, !dbg !21
|
47 |
+
%gep23 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %39, !dbg !21
|
48 |
+
%40 = extractelement <8 x i64> %35, i64 3, !dbg !21
|
49 |
+
%gep25 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %40, !dbg !21
|
50 |
+
%41 = extractelement <8 x i64> %35, i64 4, !dbg !21
|
51 |
+
%gep27 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %41, !dbg !21
|
52 |
+
%42 = extractelement <8 x i64> %35, i64 5, !dbg !21
|
53 |
+
%gep29 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %42, !dbg !21
|
54 |
+
%43 = extractelement <8 x i64> %35, i64 6, !dbg !21
|
55 |
+
%gep31 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %43, !dbg !21
|
56 |
+
%44 = extractelement <8 x i64> %35, i64 7, !dbg !21
|
57 |
+
%gep33 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %44, !dbg !21
|
58 |
+
%45 = extractelement <8 x i1> %36, i64 0, !dbg !22
|
59 |
+
%46 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep, i1 %45, i16 0, i1 %45) #5, !dbg !22
|
60 |
+
%47 = extractelement <8 x i1> %36, i64 1, !dbg !22
|
61 |
+
%48 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep21, i1 %47, i16 0, i1 %47) #5, !dbg !22
|
62 |
+
%49 = extractelement <8 x i1> %36, i64 2, !dbg !22
|
63 |
+
%50 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep23, i1 %49, i16 0, i1 %49) #5, !dbg !22
|
64 |
+
%51 = extractelement <8 x i1> %36, i64 3, !dbg !22
|
65 |
+
%52 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep25, i1 %51, i16 0, i1 %51) #5, !dbg !22
|
66 |
+
%53 = extractelement <8 x i1> %36, i64 4, !dbg !22
|
67 |
+
%54 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep27, i1 %53, i16 0, i1 %53) #5, !dbg !22
|
68 |
+
%55 = extractelement <8 x i1> %36, i64 5, !dbg !22
|
69 |
+
%56 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep29, i1 %55, i16 0, i1 %55) #5, !dbg !22
|
70 |
+
%57 = extractelement <8 x i1> %36, i64 6, !dbg !22
|
71 |
+
%58 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep31, i1 %57, i16 0, i1 %57) #5, !dbg !22
|
72 |
+
%59 = extractelement <8 x i1> %36, i64 7, !dbg !22
|
73 |
+
%60 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep33, i1 %59, i16 0, i1 %59) #5, !dbg !22
|
74 |
+
%61 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %46) #5, !dbg !23
|
75 |
+
%62 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %48) #5, !dbg !23
|
76 |
+
%63 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %50) #5, !dbg !23
|
77 |
+
%64 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %52) #5, !dbg !23
|
78 |
+
%65 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %54) #5, !dbg !23
|
79 |
+
%66 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %56) #5, !dbg !23
|
80 |
+
%67 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %58) #5, !dbg !23
|
81 |
+
%68 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %60) #5, !dbg !23
|
82 |
+
%69 = insertelement <8 x float> poison, float %61, i64 0, !dbg !24
|
83 |
+
%70 = insertelement <8 x float> %69, float %62, i64 1, !dbg !24
|
84 |
+
%71 = insertelement <8 x float> %70, float %63, i64 2, !dbg !24
|
85 |
+
%72 = insertelement <8 x float> %71, float %64, i64 3, !dbg !24
|
86 |
+
%73 = insertelement <8 x float> %72, float %65, i64 4, !dbg !24
|
87 |
+
%74 = insertelement <8 x float> %73, float %66, i64 5, !dbg !24
|
88 |
+
%75 = insertelement <8 x float> %74, float %67, i64 6, !dbg !24
|
89 |
+
%76 = insertelement <8 x float> %75, float %68, i64 7, !dbg !24
|
90 |
+
%77 = fcmp ule <8 x float> %30, %76, !dbg !24
|
91 |
+
%78 = and <8 x i1> %32, %77, !dbg !25
|
92 |
+
%79 = and <8 x i1> %36, %78, !dbg !26
|
93 |
+
%80 = select <8 x i1> %79, <8 x float> %76, <8 x float> %30, !dbg !26
|
94 |
+
%81 = add nuw nsw i32 %29, 2048, !dbg !14
|
95 |
+
%82 = icmp ult i32 %29, 48209, !dbg !14
|
96 |
+
br i1 %82, label %28, label %83, !dbg !14
|
97 |
+
|
98 |
+
83: ; preds = %28
|
99 |
+
%84 = and i32 %7, 31, !dbg !10
|
100 |
+
%85 = and i32 %8, 7, !dbg !10
|
101 |
+
%86 = extractelement <8 x float> %80, i64 0, !dbg !27
|
102 |
+
%87 = extractelement <8 x float> %80, i64 1, !dbg !27
|
103 |
+
%88 = fcmp ogt float %86, %87, !dbg !27
|
104 |
+
%89 = fcmp uno float %86, 0.000000e+00, !dbg !31
|
105 |
+
%90 = or i1 %88, %89, !dbg !32
|
106 |
+
%91 = select i1 %90, float %86, float %87, !dbg !33
|
107 |
+
%92 = extractelement <8 x float> %80, i64 2, !dbg !27
|
108 |
+
%93 = fcmp ogt float %91, %92, !dbg !27
|
109 |
+
%94 = fcmp uno float %91, 0.000000e+00, !dbg !31
|
110 |
+
%95 = or i1 %93, %94, !dbg !32
|
111 |
+
%96 = select i1 %95, float %91, float %92, !dbg !33
|
112 |
+
%97 = extractelement <8 x float> %80, i64 3, !dbg !27
|
113 |
+
%98 = fcmp ogt float %96, %97, !dbg !27
|
114 |
+
%99 = fcmp uno float %96, 0.000000e+00, !dbg !31
|
115 |
+
%100 = or i1 %98, %99, !dbg !32
|
116 |
+
%101 = select i1 %100, float %96, float %97, !dbg !33
|
117 |
+
%102 = extractelement <8 x float> %80, i64 4, !dbg !27
|
118 |
+
%103 = fcmp ogt float %101, %102, !dbg !27
|
119 |
+
%104 = fcmp uno float %101, 0.000000e+00, !dbg !31
|
120 |
+
%105 = or i1 %103, %104, !dbg !32
|
121 |
+
%106 = select i1 %105, float %101, float %102, !dbg !33
|
122 |
+
%107 = extractelement <8 x float> %80, i64 5, !dbg !27
|
123 |
+
%108 = fcmp ogt float %106, %107, !dbg !27
|
124 |
+
%109 = fcmp uno float %106, 0.000000e+00, !dbg !31
|
125 |
+
%110 = or i1 %108, %109, !dbg !32
|
126 |
+
%111 = select i1 %110, float %106, float %107, !dbg !33
|
127 |
+
%112 = extractelement <8 x float> %80, i64 6, !dbg !27
|
128 |
+
%113 = fcmp ogt float %111, %112, !dbg !27
|
129 |
+
%114 = fcmp uno float %111, 0.000000e+00, !dbg !31
|
130 |
+
%115 = or i1 %113, %114, !dbg !32
|
131 |
+
%116 = select i1 %115, float %111, float %112, !dbg !33
|
132 |
+
%117 = extractelement <8 x float> %80, i64 7, !dbg !27
|
133 |
+
%118 = fcmp ogt float %116, %117, !dbg !27
|
134 |
+
%119 = fcmp uno float %116, 0.000000e+00, !dbg !31
|
135 |
+
%120 = or i1 %118, %119, !dbg !32
|
136 |
+
%121 = select i1 %120, float %116, float %117, !dbg !33
|
137 |
+
%122 = bitcast float %121 to i32, !dbg !34
|
138 |
+
%123 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %122, i32 16, i32 31), !dbg !34
|
139 |
+
%124 = bitcast i32 %123 to float, !dbg !34
|
140 |
+
%125 = fcmp ogt float %121, %124, !dbg !27
|
141 |
+
%126 = fcmp uno float %121, 0.000000e+00, !dbg !31
|
142 |
+
%127 = or i1 %126, %125, !dbg !32
|
143 |
+
%128 = select i1 %127, float %121, float %124, !dbg !33
|
144 |
+
%129 = bitcast float %128 to i32, !dbg !34
|
145 |
+
%130 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %129, i32 8, i32 31), !dbg !34
|
146 |
+
%131 = bitcast i32 %130 to float, !dbg !34
|
147 |
+
%132 = fcmp ogt float %128, %131, !dbg !27
|
148 |
+
%133 = fcmp uno float %128, 0.000000e+00, !dbg !31
|
149 |
+
%134 = or i1 %132, %133, !dbg !32
|
150 |
+
%135 = select i1 %134, float %128, float %131, !dbg !33
|
151 |
+
%136 = bitcast float %135 to i32, !dbg !34
|
152 |
+
%137 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %136, i32 4, i32 31), !dbg !34
|
153 |
+
%138 = bitcast i32 %137 to float, !dbg !34
|
154 |
+
%139 = fcmp ogt float %135, %138, !dbg !27
|
155 |
+
%140 = fcmp uno float %135, 0.000000e+00, !dbg !31
|
156 |
+
%141 = or i1 %139, %140, !dbg !32
|
157 |
+
%142 = select i1 %141, float %135, float %138, !dbg !33
|
158 |
+
%143 = bitcast float %142 to i32, !dbg !34
|
159 |
+
%144 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %143, i32 2, i32 31), !dbg !34
|
160 |
+
%145 = bitcast i32 %144 to float, !dbg !34
|
161 |
+
%146 = fcmp ogt float %142, %145, !dbg !27
|
162 |
+
%147 = fcmp uno float %142, 0.000000e+00, !dbg !31
|
163 |
+
%148 = or i1 %146, %147, !dbg !32
|
164 |
+
%149 = select i1 %148, float %142, float %145, !dbg !33
|
165 |
+
%150 = bitcast float %149 to i32, !dbg !34
|
166 |
+
%151 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %150, i32 1, i32 31), !dbg !34
|
167 |
+
%152 = bitcast i32 %151 to float, !dbg !34
|
168 |
+
%153 = fcmp ogt float %149, %152, !dbg !27
|
169 |
+
%154 = fcmp uno float %149, 0.000000e+00, !dbg !31
|
170 |
+
%155 = or i1 %153, %154, !dbg !32
|
171 |
+
%156 = select i1 %155, float %149, float %152, !dbg !33
|
172 |
+
%157 = icmp eq i32 %84, 0, !dbg !34
|
173 |
+
%158 = zext nneg i32 %85 to i64, !dbg !34
|
174 |
+
%159 = getelementptr float, ptr addrspace(3) @global_smem, i64 %158, !dbg !34
|
175 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %159, float %156, i1 %157) #5, !dbg !34
|
176 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !34
|
177 |
+
%160 = icmp slt i32 %7, 8, !dbg !34
|
178 |
+
%161 = sext i32 %7 to i64, !dbg !34
|
179 |
+
%162 = getelementptr float, ptr addrspace(3) @global_smem, i64 %161, !dbg !34
|
180 |
+
%163 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %162, i1 %160) #5, !dbg !34
|
181 |
+
%164 = bitcast float %163 to i32, !dbg !34
|
182 |
+
%165 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %164, i32 4, i32 31), !dbg !34
|
183 |
+
%166 = bitcast i32 %165 to float, !dbg !34
|
184 |
+
%167 = fcmp ogt float %163, %166, !dbg !27
|
185 |
+
%168 = fcmp uno float %163, 0.000000e+00, !dbg !31
|
186 |
+
%169 = or i1 %168, %167, !dbg !32
|
187 |
+
%170 = select i1 %169, float %163, float %166, !dbg !33
|
188 |
+
%171 = bitcast float %170 to i32, !dbg !34
|
189 |
+
%172 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %171, i32 2, i32 31), !dbg !34
|
190 |
+
%173 = bitcast i32 %172 to float, !dbg !34
|
191 |
+
%174 = fcmp ogt float %170, %173, !dbg !27
|
192 |
+
%175 = fcmp uno float %170, 0.000000e+00, !dbg !31
|
193 |
+
%176 = or i1 %174, %175, !dbg !32
|
194 |
+
%177 = select i1 %176, float %170, float %173, !dbg !33
|
195 |
+
%178 = bitcast float %177 to i32, !dbg !34
|
196 |
+
%179 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %178, i32 1, i32 31), !dbg !34
|
197 |
+
%180 = bitcast i32 %179 to float, !dbg !34
|
198 |
+
%181 = fcmp ogt float %177, %180, !dbg !27
|
199 |
+
%182 = fcmp uno float %177, 0.000000e+00, !dbg !31
|
200 |
+
%183 = or i1 %181, %182, !dbg !32
|
201 |
+
%184 = select i1 %183, float %177, float %180, !dbg !33
|
202 |
+
%185 = and i32 %7, 7, !dbg !34
|
203 |
+
%186 = icmp eq i32 %185, 0, !dbg !34
|
204 |
+
%187 = and i1 %160, %186, !dbg !34
|
205 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %162, float %184, i1 %187) #5, !dbg !34
|
206 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !34
|
207 |
+
%188 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !34
|
208 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !36
|
209 |
+
%189 = insertelement <1 x float> undef, float %188, i64 0, !dbg !36
|
210 |
+
store <1 x float> %189, ptr addrspace(3) @global_smem, align 4, !dbg !36
|
211 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !36
|
212 |
+
%190 = load i32, ptr addrspace(3) @global_smem, align 4, !dbg !36
|
213 |
+
%191 = getelementptr float, ptr addrspace(1) %1, i64 %17, !dbg !37
|
214 |
+
%192 = icmp eq i32 %urem, 0, !dbg !38
|
215 |
+
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %190, ptr addrspace(1) %191, i1 %192) #5, !dbg !38
|
216 |
+
br label %193, !dbg !39
|
217 |
+
|
218 |
+
193: ; preds = %83, %193
|
219 |
+
%194 = phi i32 [ 0, %83 ], [ %267, %193 ]
|
220 |
+
%195 = phi <8 x float> [ zeroinitializer, %83 ], [ %266, %193 ]
|
221 |
+
%196 = zext nneg i32 %194 to i64, !dbg !40
|
222 |
+
%197 = insertelement <8 x i64> poison, i64 %196, i64 0, !dbg !40
|
223 |
+
%198 = shufflevector <8 x i64> %197, <8 x i64> poison, <8 x i32> zeroinitializer, !dbg !40
|
224 |
+
%199 = or <8 x i64> %198, %26, !dbg !40
|
225 |
+
%200 = icmp ult <8 x i64> %199, <i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257, i64 50257>, !dbg !41
|
226 |
+
%201 = extractelement <8 x i64> %199, i64 0, !dbg !42
|
227 |
+
%gep35 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %201, !dbg !42
|
228 |
+
%202 = extractelement <8 x i64> %199, i64 1, !dbg !42
|
229 |
+
%gep37 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %202, !dbg !42
|
230 |
+
%203 = extractelement <8 x i64> %199, i64 2, !dbg !42
|
231 |
+
%gep39 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %203, !dbg !42
|
232 |
+
%204 = extractelement <8 x i64> %199, i64 3, !dbg !42
|
233 |
+
%gep41 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %204, !dbg !42
|
234 |
+
%205 = extractelement <8 x i64> %199, i64 4, !dbg !42
|
235 |
+
%gep43 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %205, !dbg !42
|
236 |
+
%206 = extractelement <8 x i64> %199, i64 5, !dbg !42
|
237 |
+
%gep45 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %206, !dbg !42
|
238 |
+
%207 = extractelement <8 x i64> %199, i64 6, !dbg !42
|
239 |
+
%gep47 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %207, !dbg !42
|
240 |
+
%208 = extractelement <8 x i64> %199, i64 7, !dbg !42
|
241 |
+
%gep49 = getelementptr i16, ptr addrspace(1) %invariant.gep, i64 %208, !dbg !42
|
242 |
+
%209 = extractelement <8 x i1> %200, i64 0, !dbg !43
|
243 |
+
%210 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep35, i1 %209, i16 0, i1 %209) #5, !dbg !43
|
244 |
+
%211 = extractelement <8 x i1> %200, i64 1, !dbg !43
|
245 |
+
%212 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep37, i1 %211, i16 0, i1 %211) #5, !dbg !43
|
246 |
+
%213 = extractelement <8 x i1> %200, i64 2, !dbg !43
|
247 |
+
%214 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep39, i1 %213, i16 0, i1 %213) #5, !dbg !43
|
248 |
+
%215 = extractelement <8 x i1> %200, i64 3, !dbg !43
|
249 |
+
%216 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep41, i1 %215, i16 0, i1 %215) #5, !dbg !43
|
250 |
+
%217 = extractelement <8 x i1> %200, i64 4, !dbg !43
|
251 |
+
%218 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep43, i1 %217, i16 0, i1 %217) #5, !dbg !43
|
252 |
+
%219 = extractelement <8 x i1> %200, i64 5, !dbg !43
|
253 |
+
%220 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep45, i1 %219, i16 0, i1 %219) #5, !dbg !43
|
254 |
+
%221 = extractelement <8 x i1> %200, i64 6, !dbg !43
|
255 |
+
%222 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep47, i1 %221, i16 0, i1 %221) #5, !dbg !43
|
256 |
+
%223 = extractelement <8 x i1> %200, i64 7, !dbg !43
|
257 |
+
%224 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %gep49, i1 %223, i16 0, i1 %223) #5, !dbg !43
|
258 |
+
%225 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %210) #5, !dbg !44
|
259 |
+
%226 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %212) #5, !dbg !44
|
260 |
+
%227 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %214) #5, !dbg !44
|
261 |
+
%228 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %216) #5, !dbg !44
|
262 |
+
%229 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %218) #5, !dbg !44
|
263 |
+
%230 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %220) #5, !dbg !44
|
264 |
+
%231 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %222) #5, !dbg !44
|
265 |
+
%232 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %224) #5, !dbg !44
|
266 |
+
%233 = fsub float %225, %188, !dbg !45
|
267 |
+
%234 = fsub float %226, %188, !dbg !45
|
268 |
+
%235 = fsub float %227, %188, !dbg !45
|
269 |
+
%236 = fsub float %228, %188, !dbg !45
|
270 |
+
%237 = fsub float %229, %188, !dbg !45
|
271 |
+
%238 = fsub float %230, %188, !dbg !45
|
272 |
+
%239 = fsub float %231, %188, !dbg !45
|
273 |
+
%240 = fsub float %232, %188, !dbg !45
|
274 |
+
%241 = fmul float %233, 0x3FF7154760000000, !dbg !46
|
275 |
+
%242 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %241) #5, !dbg !46
|
276 |
+
%243 = fmul float %234, 0x3FF7154760000000, !dbg !46
|
277 |
+
%244 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %243) #5, !dbg !46
|
278 |
+
%245 = fmul float %235, 0x3FF7154760000000, !dbg !46
|
279 |
+
%246 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %245) #5, !dbg !46
|
280 |
+
%247 = fmul float %236, 0x3FF7154760000000, !dbg !46
|
281 |
+
%248 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %247) #5, !dbg !46
|
282 |
+
%249 = fmul float %237, 0x3FF7154760000000, !dbg !46
|
283 |
+
%250 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %249) #5, !dbg !46
|
284 |
+
%251 = fmul float %238, 0x3FF7154760000000, !dbg !46
|
285 |
+
%252 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %251) #5, !dbg !46
|
286 |
+
%253 = fmul float %239, 0x3FF7154760000000, !dbg !46
|
287 |
+
%254 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %253) #5, !dbg !46
|
288 |
+
%255 = fmul float %240, 0x3FF7154760000000, !dbg !46
|
289 |
+
%256 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %255) #5, !dbg !46
|
290 |
+
%257 = insertelement <8 x float> poison, float %242, i64 0, !dbg !47
|
291 |
+
%258 = insertelement <8 x float> %257, float %244, i64 1, !dbg !47
|
292 |
+
%259 = insertelement <8 x float> %258, float %246, i64 2, !dbg !47
|
293 |
+
%260 = insertelement <8 x float> %259, float %248, i64 3, !dbg !47
|
294 |
+
%261 = insertelement <8 x float> %260, float %250, i64 4, !dbg !47
|
295 |
+
%262 = insertelement <8 x float> %261, float %252, i64 5, !dbg !47
|
296 |
+
%263 = insertelement <8 x float> %262, float %254, i64 6, !dbg !47
|
297 |
+
%264 = insertelement <8 x float> %263, float %256, i64 7, !dbg !47
|
298 |
+
%265 = select <8 x i1> %200, <8 x float> %264, <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, !dbg !47
|
299 |
+
%266 = fadd <8 x float> %195, %265, !dbg !47
|
300 |
+
%267 = add nuw nsw i32 %194, 2048, !dbg !39
|
301 |
+
%268 = icmp ult i32 %194, 48209, !dbg !39
|
302 |
+
br i1 %268, label %193, label %269, !dbg !39
|
303 |
+
|
304 |
+
269: ; preds = %193
|
305 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !48
|
306 |
+
%shift = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
307 |
+
%270 = fadd <8 x float> %266, %shift, !dbg !52
|
308 |
+
%shift95 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
309 |
+
%271 = fadd <8 x float> %shift95, %270, !dbg !52
|
310 |
+
%shift96 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
311 |
+
%272 = fadd <8 x float> %shift96, %271, !dbg !52
|
312 |
+
%shift97 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
313 |
+
%273 = fadd <8 x float> %shift97, %272, !dbg !52
|
314 |
+
%shift98 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
315 |
+
%274 = fadd <8 x float> %shift98, %273, !dbg !52
|
316 |
+
%shift99 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
317 |
+
%275 = fadd <8 x float> %shift99, %274, !dbg !52
|
318 |
+
%shift100 = shufflevector <8 x float> %266, <8 x float> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, !dbg !52
|
319 |
+
%276 = fadd <8 x float> %shift100, %275, !dbg !52
|
320 |
+
%277 = extractelement <8 x float> %276, i64 0, !dbg !52
|
321 |
+
%278 = bitcast float %277 to i32, !dbg !48
|
322 |
+
%279 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %278, i32 16, i32 31), !dbg !48
|
323 |
+
%280 = bitcast i32 %279 to float, !dbg !48
|
324 |
+
%281 = fadd float %277, %280, !dbg !52
|
325 |
+
%282 = bitcast float %281 to i32, !dbg !48
|
326 |
+
%283 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %282, i32 8, i32 31), !dbg !48
|
327 |
+
%284 = bitcast i32 %283 to float, !dbg !48
|
328 |
+
%285 = fadd float %281, %284, !dbg !52
|
329 |
+
%286 = bitcast float %285 to i32, !dbg !48
|
330 |
+
%287 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %286, i32 4, i32 31), !dbg !48
|
331 |
+
%288 = bitcast i32 %287 to float, !dbg !48
|
332 |
+
%289 = fadd float %285, %288, !dbg !52
|
333 |
+
%290 = bitcast float %289 to i32, !dbg !48
|
334 |
+
%291 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %290, i32 2, i32 31), !dbg !48
|
335 |
+
%292 = bitcast i32 %291 to float, !dbg !48
|
336 |
+
%293 = fadd float %289, %292, !dbg !52
|
337 |
+
%294 = bitcast float %293 to i32, !dbg !48
|
338 |
+
%295 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %294, i32 1, i32 31), !dbg !48
|
339 |
+
%296 = bitcast i32 %295 to float, !dbg !48
|
340 |
+
%297 = fadd float %293, %296, !dbg !52
|
341 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %159, float %297, i1 %157) #5, !dbg !48
|
342 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !48
|
343 |
+
%298 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %162, i1 %160) #5, !dbg !48
|
344 |
+
%299 = bitcast float %298 to i32, !dbg !48
|
345 |
+
%300 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %299, i32 4, i32 31), !dbg !48
|
346 |
+
%301 = bitcast i32 %300 to float, !dbg !48
|
347 |
+
%302 = fadd float %298, %301, !dbg !52
|
348 |
+
%303 = bitcast float %302 to i32, !dbg !48
|
349 |
+
%304 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %303, i32 2, i32 31), !dbg !48
|
350 |
+
%305 = bitcast i32 %304 to float, !dbg !48
|
351 |
+
%306 = fadd float %302, %305, !dbg !52
|
352 |
+
%307 = bitcast float %306 to i32, !dbg !48
|
353 |
+
%308 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %307, i32 1, i32 31), !dbg !48
|
354 |
+
%309 = bitcast i32 %308 to float, !dbg !48
|
355 |
+
%310 = fadd float %306, %309, !dbg !52
|
356 |
+
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %162, float %310, i1 %187) #5, !dbg !48
|
357 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !48
|
358 |
+
%311 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !48
|
359 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !56
|
360 |
+
%312 = insertelement <1 x float> undef, float %311, i64 0, !dbg !56
|
361 |
+
store <1 x float> %312, ptr addrspace(3) @global_smem, align 4, !dbg !56
|
362 |
+
tail call void @llvm.nvvm.barrier0(), !dbg !56
|
363 |
+
%313 = load i32, ptr addrspace(3) @global_smem, align 4, !dbg !56
|
364 |
+
%314 = getelementptr float, ptr addrspace(1) %2, i64 %17, !dbg !57
|
365 |
+
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %313, ptr addrspace(1) %314, i1 %192) #5, !dbg !58
|
366 |
+
%315 = fcmp olt float %311, 0x3810000000000000, !dbg !59
|
367 |
+
%316 = fmul float %311, 0x4160000000000000, !dbg !59
|
368 |
+
%.02.i = select i1 %315, float %316, float %311, !dbg !59
|
369 |
+
%i.i.0.i = select i1 %315, float -2.300000e+01, float 0.000000e+00, !dbg !59
|
370 |
+
%317 = bitcast float %.02.i to i32, !dbg !59
|
371 |
+
%318 = add i32 %317, -1059760811, !dbg !59
|
372 |
+
%319 = and i32 %318, -8388608, !dbg !59
|
373 |
+
%320 = sub i32 %317, %319, !dbg !59
|
374 |
+
%321 = bitcast i32 %320 to float, !dbg !59
|
375 |
+
%322 = sitofp i32 %319 to float, !dbg !59
|
376 |
+
%323 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
377 |
+
%.not.i = icmp eq i32 %323, 0, !dbg !59
|
378 |
+
%324 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %322, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !59
|
379 |
+
%325 = tail call float @llvm.nvvm.fma.rn.f(float %322, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !59
|
380 |
+
%.08.i = select i1 %.not.i, float %325, float %324, !dbg !59
|
381 |
+
%326 = fadd float %321, -1.000000e+00, !dbg !59
|
382 |
+
%327 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
383 |
+
%.not1.i = icmp eq i32 %327, 0, !dbg !59
|
384 |
+
%328 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %326, float 0x3FC2073EC0000000) #5, !dbg !59
|
385 |
+
%329 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %326, float 0x3FC2073EC0000000) #5, !dbg !59
|
386 |
+
%.010.i = select i1 %.not1.i, float %329, float %328, !dbg !59
|
387 |
+
%330 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
388 |
+
%.not2.i = icmp eq i32 %330, 0, !dbg !59
|
389 |
+
%331 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i, float %326, float 0xBFBF19B980000000) #5, !dbg !59
|
390 |
+
%332 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i, float %326, float 0xBFBF19B980000000) #5, !dbg !59
|
391 |
+
%.011.i = select i1 %.not2.i, float %332, float %331, !dbg !59
|
392 |
+
%333 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
393 |
+
%.not3.i = icmp eq i32 %333, 0, !dbg !59
|
394 |
+
%334 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i, float %326, float 0x3FC1E52AA0000000) #5, !dbg !59
|
395 |
+
%335 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i, float %326, float 0x3FC1E52AA0000000) #5, !dbg !59
|
396 |
+
%.012.i = select i1 %.not3.i, float %335, float %334, !dbg !59
|
397 |
+
%336 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
398 |
+
%.not4.i = icmp eq i32 %336, 0, !dbg !59
|
399 |
+
%337 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i, float %326, float 0xBFC55B1720000000) #5, !dbg !59
|
400 |
+
%338 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i, float %326, float 0xBFC55B1720000000) #5, !dbg !59
|
401 |
+
%.09.i = select i1 %.not4.i, float %338, float %337, !dbg !59
|
402 |
+
%339 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
403 |
+
%.not5.i = icmp eq i32 %339, 0, !dbg !59
|
404 |
+
%340 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i, float %326, float 0x3FC99DA160000000) #5, !dbg !59
|
405 |
+
%341 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i, float %326, float 0x3FC99DA160000000) #5, !dbg !59
|
406 |
+
%.05.i = select i1 %.not5.i, float %341, float %340, !dbg !59
|
407 |
+
%342 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
408 |
+
%.not6.i = icmp eq i32 %342, 0, !dbg !59
|
409 |
+
%343 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %326, float 0xBFCFFFE440000000) #5, !dbg !59
|
410 |
+
%344 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %326, float 0xBFCFFFE440000000) #5, !dbg !59
|
411 |
+
%.01.i = select i1 %.not6.i, float %344, float %343, !dbg !59
|
412 |
+
%345 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
413 |
+
%.not7.i = icmp eq i32 %345, 0, !dbg !59
|
414 |
+
%346 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i, float %326, float 0x3FD5554F00000000) #5, !dbg !59
|
415 |
+
%347 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i, float %326, float 0x3FD5554F00000000) #5, !dbg !59
|
416 |
+
%.0.i = select i1 %.not7.i, float %347, float %346, !dbg !59
|
417 |
+
%348 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
418 |
+
%.not8.i = icmp eq i32 %348, 0, !dbg !59
|
419 |
+
%349 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i, float %326, float -5.000000e-01) #5, !dbg !59
|
420 |
+
%350 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i, float %326, float -5.000000e-01) #5, !dbg !59
|
421 |
+
%.07.i = select i1 %.not8.i, float %350, float %349, !dbg !59
|
422 |
+
%351 = fmul float %326, %.07.i, !dbg !59
|
423 |
+
%352 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
424 |
+
%.not9.i = icmp eq i32 %352, 0, !dbg !59
|
425 |
+
%353 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %351, float %326, float %326) #5, !dbg !59
|
426 |
+
%354 = tail call float @llvm.nvvm.fma.rn.f(float %351, float %326, float %326) #5, !dbg !59
|
427 |
+
%.06.i = select i1 %.not9.i, float %354, float %353, !dbg !59
|
428 |
+
%355 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
429 |
+
%.not10.i = icmp eq i32 %355, 0, !dbg !59
|
430 |
+
%356 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !59
|
431 |
+
%357 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !59
|
432 |
+
%.04.i = select i1 %.not10.i, float %357, float %356, !dbg !59
|
433 |
+
%358 = icmp ugt i32 %317, 2139095039, !dbg !59
|
434 |
+
br i1 %358, label %__nv_fmaf_rn.exit.i.i, label %__nv_logf.exit, !dbg !59
|
435 |
+
|
436 |
+
__nv_fmaf_rn.exit.i.i: ; preds = %269
|
437 |
+
%359 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !59
|
438 |
+
%.not11.i = icmp eq i32 %359, 0, !dbg !59
|
439 |
+
%360 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !59
|
440 |
+
%361 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !59
|
441 |
+
%.03.i = select i1 %.not11.i, float %361, float %360, !dbg !59
|
442 |
+
br label %__nv_logf.exit, !dbg !59
|
443 |
+
|
444 |
+
__nv_logf.exit: ; preds = %269, %__nv_fmaf_rn.exit.i.i
|
445 |
+
%r.i.0.i = phi float [ %.03.i, %__nv_fmaf_rn.exit.i.i ], [ %.04.i, %269 ], !dbg !59
|
446 |
+
%362 = fcmp oeq float %.02.i, 0.000000e+00, !dbg !59
|
447 |
+
%r.i.1.i = select i1 %362, float 0xFFF0000000000000, float %r.i.0.i, !dbg !59
|
448 |
+
%363 = extractelement <8 x i64> %26, i64 0, !dbg !60
|
449 |
+
%364 = extractelement <8 x i64> %26, i64 1, !dbg !60
|
450 |
+
%365 = extractelement <8 x i64> %26, i64 2, !dbg !60
|
451 |
+
%366 = extractelement <8 x i64> %26, i64 3, !dbg !60
|
452 |
+
%367 = extractelement <8 x i64> %26, i64 4, !dbg !60
|
453 |
+
%368 = extractelement <8 x i64> %26, i64 5, !dbg !60
|
454 |
+
%369 = extractelement <8 x i64> %26, i64 6, !dbg !60
|
455 |
+
%370 = extractelement <8 x i64> %26, i64 7, !dbg !60
|
456 |
+
br label %371, !dbg !61
|
457 |
+
|
458 |
+
371: ; preds = %__nv_logf.exit, %371
|
459 |
+
%372 = phi i32 [ 0, %__nv_logf.exit ], [ %454, %371 ]
|
460 |
+
%373 = zext nneg i32 %372 to i64, !dbg !60
|
461 |
+
%374 = or i64 %363, %373, !dbg !60
|
462 |
+
%375 = or i64 %364, %373, !dbg !60
|
463 |
+
%376 = or i64 %365, %373, !dbg !60
|
464 |
+
%377 = or i64 %366, %373, !dbg !60
|
465 |
+
%378 = or i64 %367, %373, !dbg !60
|
466 |
+
%379 = or i64 %368, %373, !dbg !60
|
467 |
+
%380 = or i64 %369, %373, !dbg !60
|
468 |
+
%381 = or i64 %370, %373, !dbg !60
|
469 |
+
%382 = icmp ult i64 %374, 50257, !dbg !62
|
470 |
+
%383 = icmp ult i64 %375, 50257, !dbg !62
|
471 |
+
%384 = icmp ult i64 %376, 50257, !dbg !62
|
472 |
+
%385 = icmp ult i64 %377, 50257, !dbg !62
|
473 |
+
%386 = icmp ult i64 %378, 50257, !dbg !62
|
474 |
+
%387 = icmp ult i64 %379, 50257, !dbg !62
|
475 |
+
%388 = icmp ult i64 %380, 50257, !dbg !62
|
476 |
+
%389 = icmp ult i64 %381, 50257, !dbg !62
|
477 |
+
%390 = add nsw i64 %374, %27, !dbg !63
|
478 |
+
%391 = add nsw i64 %375, %27, !dbg !63
|
479 |
+
%392 = add nsw i64 %376, %27, !dbg !63
|
480 |
+
%393 = add nsw i64 %377, %27, !dbg !63
|
481 |
+
%394 = add nsw i64 %378, %27, !dbg !63
|
482 |
+
%395 = add nsw i64 %379, %27, !dbg !63
|
483 |
+
%396 = add nsw i64 %380, %27, !dbg !63
|
484 |
+
%397 = add nsw i64 %381, %27, !dbg !63
|
485 |
+
%398 = getelementptr i16, ptr addrspace(1) %0, i64 %390, !dbg !64
|
486 |
+
%399 = getelementptr i16, ptr addrspace(1) %0, i64 %391, !dbg !64
|
487 |
+
%400 = getelementptr i16, ptr addrspace(1) %0, i64 %392, !dbg !64
|
488 |
+
%401 = getelementptr i16, ptr addrspace(1) %0, i64 %393, !dbg !64
|
489 |
+
%402 = getelementptr i16, ptr addrspace(1) %0, i64 %394, !dbg !64
|
490 |
+
%403 = getelementptr i16, ptr addrspace(1) %0, i64 %395, !dbg !64
|
491 |
+
%404 = getelementptr i16, ptr addrspace(1) %0, i64 %396, !dbg !64
|
492 |
+
%405 = getelementptr i16, ptr addrspace(1) %0, i64 %397, !dbg !64
|
493 |
+
%406 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %398, i1 %382, i16 0, i1 %382) #5, !dbg !65
|
494 |
+
%407 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %399, i1 %383, i16 0, i1 %383) #5, !dbg !65
|
495 |
+
%408 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %400, i1 %384, i16 0, i1 %384) #5, !dbg !65
|
496 |
+
%409 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %401, i1 %385, i16 0, i1 %385) #5, !dbg !65
|
497 |
+
%410 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %402, i1 %386, i16 0, i1 %386) #5, !dbg !65
|
498 |
+
%411 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %403, i1 %387, i16 0, i1 %387) #5, !dbg !65
|
499 |
+
%412 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %404, i1 %388, i16 0, i1 %388) #5, !dbg !65
|
500 |
+
%413 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_first.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %405, i1 %389, i16 0, i1 %389) #5, !dbg !65
|
501 |
+
%414 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %406) #5, !dbg !66
|
502 |
+
%415 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %407) #5, !dbg !66
|
503 |
+
%416 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %408) #5, !dbg !66
|
504 |
+
%417 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %409) #5, !dbg !66
|
505 |
+
%418 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %410) #5, !dbg !66
|
506 |
+
%419 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %411) #5, !dbg !66
|
507 |
+
%420 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %412) #5, !dbg !66
|
508 |
+
%421 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %413) #5, !dbg !66
|
509 |
+
%422 = fsub float %414, %188, !dbg !67
|
510 |
+
%423 = fsub float %415, %188, !dbg !67
|
511 |
+
%424 = fsub float %416, %188, !dbg !67
|
512 |
+
%425 = fsub float %417, %188, !dbg !67
|
513 |
+
%426 = fsub float %418, %188, !dbg !67
|
514 |
+
%427 = fsub float %419, %188, !dbg !67
|
515 |
+
%428 = fsub float %420, %188, !dbg !67
|
516 |
+
%429 = fsub float %421, %188, !dbg !67
|
517 |
+
%430 = fsub float %422, %r.i.1.i, !dbg !68
|
518 |
+
%431 = fsub float %423, %r.i.1.i, !dbg !68
|
519 |
+
%432 = fsub float %424, %r.i.1.i, !dbg !68
|
520 |
+
%433 = fsub float %425, %r.i.1.i, !dbg !68
|
521 |
+
%434 = fsub float %426, %r.i.1.i, !dbg !68
|
522 |
+
%435 = fsub float %427, %r.i.1.i, !dbg !68
|
523 |
+
%436 = fsub float %428, %r.i.1.i, !dbg !68
|
524 |
+
%437 = fsub float %429, %r.i.1.i, !dbg !68
|
525 |
+
%438 = getelementptr i16, ptr addrspace(1) %3, i64 %390, !dbg !69
|
526 |
+
%439 = getelementptr i16, ptr addrspace(1) %3, i64 %391, !dbg !69
|
527 |
+
%440 = getelementptr i16, ptr addrspace(1) %3, i64 %392, !dbg !69
|
528 |
+
%441 = getelementptr i16, ptr addrspace(1) %3, i64 %393, !dbg !69
|
529 |
+
%442 = getelementptr i16, ptr addrspace(1) %3, i64 %394, !dbg !69
|
530 |
+
%443 = getelementptr i16, ptr addrspace(1) %3, i64 %395, !dbg !69
|
531 |
+
%444 = getelementptr i16, ptr addrspace(1) %3, i64 %396, !dbg !69
|
532 |
+
%445 = getelementptr i16, ptr addrspace(1) %3, i64 %397, !dbg !69
|
533 |
+
%446 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %430) #5, !dbg !70
|
534 |
+
%447 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %431) #5, !dbg !70
|
535 |
+
%448 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %432) #5, !dbg !70
|
536 |
+
%449 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %433) #5, !dbg !70
|
537 |
+
%450 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %434) #5, !dbg !70
|
538 |
+
%451 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %435) #5, !dbg !70
|
539 |
+
%452 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %436) #5, !dbg !70
|
540 |
+
%453 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %437) #5, !dbg !70
|
541 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %446, ptr addrspace(1) %438, i1 %382) #5, !dbg !70
|
542 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %447, ptr addrspace(1) %439, i1 %383) #5, !dbg !70
|
543 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %448, ptr addrspace(1) %440, i1 %384) #5, !dbg !70
|
544 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %449, ptr addrspace(1) %441, i1 %385) #5, !dbg !70
|
545 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %450, ptr addrspace(1) %442, i1 %386) #5, !dbg !70
|
546 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %451, ptr addrspace(1) %443, i1 %387) #5, !dbg !70
|
547 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %452, ptr addrspace(1) %444, i1 %388) #5, !dbg !70
|
548 |
+
tail call void asm sideeffect "@$2 st.global.b16 [ $1 + 0 ], { $0 };", "c,l,b"(i16 %453, ptr addrspace(1) %445, i1 %389) #5, !dbg !70
|
549 |
+
%454 = add nuw nsw i32 %372, 2048, !dbg !61
|
550 |
+
%455 = icmp ult i32 %372, 48209, !dbg !61
|
551 |
+
br i1 %455, label %371, label %456, !dbg !61
|
552 |
+
|
553 |
+
456: ; preds = %371
|
554 |
+
ret void, !dbg !71
|
555 |
+
}
|
556 |
+
|
557 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
558 |
+
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
|
559 |
+
|
560 |
+
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
|
561 |
+
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
|
562 |
+
|
563 |
+
; Function Attrs: convergent nocallback nounwind
|
564 |
+
declare void @llvm.nvvm.barrier0() #2
|
565 |
+
|
566 |
+
; Function Attrs: alwaysinline nounwind
|
567 |
+
define float @__nv_logf(float %a) local_unnamed_addr #3 {
|
568 |
+
__nv_fmaf_rn.exit10.i:
|
569 |
+
%0 = fcmp olt float %a, 0x3810000000000000
|
570 |
+
%1 = fmul float %a, 0x4160000000000000
|
571 |
+
%.02 = select i1 %0, float %1, float %a
|
572 |
+
%i.i.0 = select i1 %0, float -2.300000e+01, float 0.000000e+00
|
573 |
+
%2 = bitcast float %.02 to i32
|
574 |
+
%3 = add i32 %2, -1059760811
|
575 |
+
%4 = and i32 %3, -8388608
|
576 |
+
%5 = sub i32 %2, %4
|
577 |
+
%6 = bitcast i32 %5 to float
|
578 |
+
%7 = sitofp i32 %4 to float
|
579 |
+
%8 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
580 |
+
%.not = icmp eq i32 %8, 0
|
581 |
+
%9 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %7, float 0x3E80000000000000, float %i.i.0) #5
|
582 |
+
%10 = tail call float @llvm.nvvm.fma.rn.f(float %7, float 0x3E80000000000000, float %i.i.0) #5
|
583 |
+
%.08 = select i1 %.not, float %10, float %9
|
584 |
+
%11 = fadd float %6, -1.000000e+00
|
585 |
+
%12 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
586 |
+
%.not1 = icmp eq i32 %12, 0
|
587 |
+
%13 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5
|
588 |
+
%14 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5
|
589 |
+
%.010 = select i1 %.not1, float %14, float %13
|
590 |
+
%15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
591 |
+
%.not2 = icmp eq i32 %15, 0
|
592 |
+
%16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010, float %11, float 0xBFBF19B980000000) #5
|
593 |
+
%17 = tail call float @llvm.nvvm.fma.rn.f(float %.010, float %11, float 0xBFBF19B980000000) #5
|
594 |
+
%.011 = select i1 %.not2, float %17, float %16
|
595 |
+
%18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
596 |
+
%.not3 = icmp eq i32 %18, 0
|
597 |
+
%19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5
|
598 |
+
%20 = tail call float @llvm.nvvm.fma.rn.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5
|
599 |
+
%.012 = select i1 %.not3, float %20, float %19
|
600 |
+
%21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
601 |
+
%.not4 = icmp eq i32 %21, 0
|
602 |
+
%22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012, float %11, float 0xBFC55B1720000000) #5
|
603 |
+
%23 = tail call float @llvm.nvvm.fma.rn.f(float %.012, float %11, float 0xBFC55B1720000000) #5
|
604 |
+
%.09 = select i1 %.not4, float %23, float %22
|
605 |
+
%24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
606 |
+
%.not5 = icmp eq i32 %24, 0
|
607 |
+
%25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09, float %11, float 0x3FC99DA160000000) #5
|
608 |
+
%26 = tail call float @llvm.nvvm.fma.rn.f(float %.09, float %11, float 0x3FC99DA160000000) #5
|
609 |
+
%.05 = select i1 %.not5, float %26, float %25
|
610 |
+
%27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
611 |
+
%.not6 = icmp eq i32 %27, 0
|
612 |
+
%28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %11, float 0xBFCFFFE440000000) #5
|
613 |
+
%29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %11, float 0xBFCFFFE440000000) #5
|
614 |
+
%.01 = select i1 %.not6, float %29, float %28
|
615 |
+
%30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
616 |
+
%.not7 = icmp eq i32 %30, 0
|
617 |
+
%31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01, float %11, float 0x3FD5554F00000000) #5
|
618 |
+
%32 = tail call float @llvm.nvvm.fma.rn.f(float %.01, float %11, float 0x3FD5554F00000000) #5
|
619 |
+
%.0 = select i1 %.not7, float %32, float %31
|
620 |
+
%33 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
621 |
+
%.not8 = icmp eq i32 %33, 0
|
622 |
+
%34 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0, float %11, float -5.000000e-01) #5
|
623 |
+
%35 = tail call float @llvm.nvvm.fma.rn.f(float %.0, float %11, float -5.000000e-01) #5
|
624 |
+
%.07 = select i1 %.not8, float %35, float %34
|
625 |
+
%36 = fmul float %11, %.07
|
626 |
+
%37 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
627 |
+
%.not9 = icmp eq i32 %37, 0
|
628 |
+
%38 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %36, float %11, float %11) #5
|
629 |
+
%39 = tail call float @llvm.nvvm.fma.rn.f(float %36, float %11, float %11) #5
|
630 |
+
%.06 = select i1 %.not9, float %39, float %38
|
631 |
+
%40 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
632 |
+
%.not10 = icmp eq i32 %40, 0
|
633 |
+
%41 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08, float 0x3FE62E4300000000, float %.06) #5
|
634 |
+
%42 = tail call float @llvm.nvvm.fma.rn.f(float %.08, float 0x3FE62E4300000000, float %.06) #5
|
635 |
+
%.04 = select i1 %.not10, float %42, float %41
|
636 |
+
%43 = icmp ugt i32 %2, 2139095039
|
637 |
+
br i1 %43, label %__nv_fmaf_rn.exit.i, label %__internal_accurate_logf.exit
|
638 |
+
|
639 |
+
__nv_fmaf_rn.exit.i: ; preds = %__nv_fmaf_rn.exit10.i
|
640 |
+
%44 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5
|
641 |
+
%.not11 = icmp eq i32 %44, 0
|
642 |
+
%45 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5
|
643 |
+
%46 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5
|
644 |
+
%.03 = select i1 %.not11, float %46, float %45
|
645 |
+
br label %__internal_accurate_logf.exit
|
646 |
+
|
647 |
+
__internal_accurate_logf.exit: ; preds = %__nv_fmaf_rn.exit.i, %__nv_fmaf_rn.exit10.i
|
648 |
+
%r.i.0 = phi float [ %.03, %__nv_fmaf_rn.exit.i ], [ %.04, %__nv_fmaf_rn.exit10.i ]
|
649 |
+
%47 = fcmp oeq float %.02, 0.000000e+00
|
650 |
+
%r.i.1 = select i1 %47, float 0xFFF0000000000000, float %r.i.0
|
651 |
+
ret float %r.i.1
|
652 |
+
}
|
653 |
+
|
654 |
+
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
|
655 |
+
|
656 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
657 |
+
declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0
|
658 |
+
|
659 |
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
660 |
+
declare float @llvm.nvvm.fma.rn.f(float, float, float) #0
|
661 |
+
|
662 |
+
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
663 |
+
attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
|
664 |
+
attributes #2 = { convergent nocallback nounwind }
|
665 |
+
attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
666 |
+
attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
667 |
+
attributes #5 = { nounwind }
|
668 |
+
|
669 |
+
!llvm.module.flags = !{!0, !1}
|
670 |
+
!llvm.dbg.cu = !{!2}
|
671 |
+
!nvvm.annotations = !{!4, !5, !5, !4}
|
672 |
+
!llvm.ident = !{!6}
|
673 |
+
|
674 |
+
!0 = !{i32 2, !"Debug Info Version", i32 3}
|
675 |
+
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
|
676 |
+
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
677 |
+
!3 = !DIFile(filename: "cgu6pijtlte2d3yicmpedfh2c7wgxsaexd6ichnxwbwh4deqe6ck.py", directory: "/tmp/torchinductor_root/gu")
|
678 |
+
!4 = !{ptr @triton__0d1d2d3d4de5, !"kernel", i32 1}
|
679 |
+
!5 = !{ptr @triton__0d1d2d3d4de5, !"maxntidx", i32 256}
|
680 |
+
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
|
681 |
+
!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4de5", linkageName: "triton__0d1d2d3d4de5", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
682 |
+
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
|
683 |
+
!9 = !{}
|
684 |
+
!10 = !DILocation(line: 24, column: 33, scope: !7)
|
685 |
+
!11 = !DILocation(line: 21, column: 28, scope: !7)
|
686 |
+
!12 = !DILocation(line: 21, column: 34, scope: !7)
|
687 |
+
!13 = !DILocation(line: 31, column: 46, scope: !7)
|
688 |
+
!14 = !DILocation(line: 27, column: 36, scope: !7)
|
689 |
+
!15 = !DILocation(line: 28, column: 27, scope: !7)
|
690 |
+
!16 = !DILocation(line: 38, column: 21, scope: !17, inlinedAt: !19)
|
691 |
+
!17 = distinct !DILexicalBlockFile(scope: !7, file: !18, discriminator: 0)
|
692 |
+
!18 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
|
693 |
+
!19 = !DILocation(line: 34, column: 45, scope: !17)
|
694 |
+
!20 = !DILocation(line: 29, column: 25, scope: !7)
|
695 |
+
!21 = !DILocation(line: 31, column: 34, scope: !7)
|
696 |
+
!22 = !DILocation(line: 31, column: 52, scope: !7)
|
697 |
+
!23 = !DILocation(line: 31, column: 103, scope: !7)
|
698 |
+
!24 = !DILocation(line: 36, column: 15, scope: !17, inlinedAt: !19)
|
699 |
+
!25 = !DILocation(line: 38, column: 16, scope: !17, inlinedAt: !19)
|
700 |
+
!26 = !DILocation(line: 0, scope: !7)
|
701 |
+
!27 = !DILocation(line: 36, column: 15, scope: !28, inlinedAt: !29)
|
702 |
+
!28 = distinct !DILexicalBlockFile(scope: !17, file: !18, discriminator: 0)
|
703 |
+
!29 = !DILocation(line: 49, column: 29, scope: !28, inlinedAt: !30)
|
704 |
+
!30 = !DILocation(line: 36, column: 38, scope: !28)
|
705 |
+
!31 = !DILocation(line: 38, column: 21, scope: !28, inlinedAt: !29)
|
706 |
+
!32 = !DILocation(line: 38, column: 16, scope: !28, inlinedAt: !29)
|
707 |
+
!33 = !DILocation(line: 39, column: 29, scope: !28, inlinedAt: !29)
|
708 |
+
!34 = !DILocation(line: 49, column: 29, scope: !17, inlinedAt: !35)
|
709 |
+
!35 = !DILocation(line: 36, column: 38, scope: !17)
|
710 |
+
!36 = !DILocation(line: 36, column: 41, scope: !7)
|
711 |
+
!37 = !DILocation(line: 37, column: 25, scope: !7)
|
712 |
+
!38 = !DILocation(line: 37, column: 36, scope: !7)
|
713 |
+
!39 = !DILocation(line: 39, column: 36, scope: !7)
|
714 |
+
!40 = !DILocation(line: 40, column: 27, scope: !7)
|
715 |
+
!41 = !DILocation(line: 41, column: 25, scope: !7)
|
716 |
+
!42 = !DILocation(line: 43, column: 34, scope: !7)
|
717 |
+
!43 = !DILocation(line: 43, column: 52, scope: !7)
|
718 |
+
!44 = !DILocation(line: 43, column: 103, scope: !7)
|
719 |
+
!45 = !DILocation(line: 45, column: 22, scope: !7)
|
720 |
+
!46 = !DILocation(line: 46, column: 22, scope: !7)
|
721 |
+
!47 = !DILocation(line: 49, column: 40, scope: !7)
|
722 |
+
!48 = !DILocation(line: 243, column: 36, scope: !49, inlinedAt: !51)
|
723 |
+
!49 = distinct !DILexicalBlockFile(scope: !7, file: !50, discriminator: 0)
|
724 |
+
!50 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
|
725 |
+
!51 = !DILocation(line: 50, column: 27, scope: !49)
|
726 |
+
!52 = !DILocation(line: 233, column: 15, scope: !53, inlinedAt: !54)
|
727 |
+
!53 = distinct !DILexicalBlockFile(scope: !49, file: !50, discriminator: 0)
|
728 |
+
!54 = !DILocation(line: 243, column: 36, scope: !53, inlinedAt: !55)
|
729 |
+
!55 = !DILocation(line: 50, column: 27, scope: !53)
|
730 |
+
!56 = !DILocation(line: 50, column: 30, scope: !7)
|
731 |
+
!57 = !DILocation(line: 51, column: 25, scope: !7)
|
732 |
+
!58 = !DILocation(line: 51, column: 37, scope: !7)
|
733 |
+
!59 = !DILocation(line: 59, column: 23, scope: !7)
|
734 |
+
!60 = !DILocation(line: 53, column: 27, scope: !7)
|
735 |
+
!61 = !DILocation(line: 52, column: 36, scope: !7)
|
736 |
+
!62 = !DILocation(line: 54, column: 25, scope: !7)
|
737 |
+
!63 = !DILocation(line: 56, column: 41, scope: !7)
|
738 |
+
!64 = !DILocation(line: 56, column: 35, scope: !7)
|
739 |
+
!65 = !DILocation(line: 56, column: 53, scope: !7)
|
740 |
+
!66 = !DILocation(line: 56, column: 105, scope: !7)
|
741 |
+
!67 = !DILocation(line: 58, column: 24, scope: !7)
|
742 |
+
!68 = !DILocation(line: 60, column: 24, scope: !7)
|
743 |
+
!69 = !DILocation(line: 62, column: 29, scope: !7)
|
744 |
+
!70 = !DILocation(line: 62, column: 54, scope: !7)
|
745 |
+
!71 = !DILocation(line: 52, column: 4, scope: !7)
|
.triton/dump/791dcf81763c6dee467e1d2c436fd6cf/triton_.ttir
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1d2d3d4de5(%arg0: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg1: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg2: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg3: !tt.ptr<bf16, 1> {tt.divisibility = 16 : i32}, %arg4: i64 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}, %arg5: i64) attributes {noinline = false} {
|
3 |
+
%cst = arith.constant dense<0.000000e+00> : tensor<1x2048xbf16>
|
4 |
+
%c50257_i64 = arith.constant 50257 : i64
|
5 |
+
%cst_0 = arith.constant dense<true> : tensor<1x2048xi1>
|
6 |
+
%c50257_i32 = arith.constant 50257 : i32
|
7 |
+
%c2048_i32 = arith.constant 2048 : i32
|
8 |
+
%c0_i32 = arith.constant 0 : i32
|
9 |
+
%cst_1 = arith.constant dense<50257> : tensor<1x2048xi64>
|
10 |
+
%cst_2 = arith.constant dense<0.000000e+00> : tensor<1x2048xf32>
|
11 |
+
%cst_3 = arith.constant dense<0xFF800000> : tensor<1x2048xf32>
|
12 |
+
%0 = tt.get_program_id x : i32
|
13 |
+
%1 = arith.extsi %0 : i32 to i64
|
14 |
+
%2 = tt.make_range {end = 2048 : i32, start = 0 : i32} : tensor<2048xi32>
|
15 |
+
%3 = tt.expand_dims %2 {axis = 0 : i32} : (tensor<2048xi32>) -> tensor<1x2048xi32>
|
16 |
+
%4 = arith.extsi %3 : tensor<1x2048xi32> to tensor<1x2048xi64>
|
17 |
+
%5 = arith.muli %1, %c50257_i64 : i64
|
18 |
+
%6 = tt.splat %5 : (i64) -> tensor<1x2048xi64>
|
19 |
+
%7 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
|
20 |
+
%8 = scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 iter_args(%arg7 = %cst_3) -> (tensor<1x2048xf32>) : i32 {
|
21 |
+
%29 = arith.extsi %arg6 : i32 to i64
|
22 |
+
%30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
|
23 |
+
%31 = arith.addi %30, %4 : tensor<1x2048xi64>
|
24 |
+
%32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
|
25 |
+
%33 = arith.addi %31, %6 : tensor<1x2048xi64>
|
26 |
+
%34 = tt.addptr %7, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
|
27 |
+
%35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x2048xbf16>
|
28 |
+
%36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
|
29 |
+
%37 = arith.cmpf ogt, %arg7, %36 : tensor<1x2048xf32>
|
30 |
+
%38 = arith.cmpf une, %arg7, %arg7 : tensor<1x2048xf32>
|
31 |
+
%39 = arith.ori %37, %38 : tensor<1x2048xi1>
|
32 |
+
%40 = arith.xori %39, %cst_0 : tensor<1x2048xi1>
|
33 |
+
%41 = arith.andi %32, %40 : tensor<1x2048xi1>
|
34 |
+
%42 = arith.select %41, %36, %arg7 : tensor<1x2048xi1>, tensor<1x2048xf32>
|
35 |
+
scf.yield %42 : tensor<1x2048xf32>
|
36 |
+
}
|
37 |
+
%9 = "tt.reduce"(%8) <{axis = 1 : i32}> ({
|
38 |
+
^bb0(%arg6: f32, %arg7: f32):
|
39 |
+
%29 = arith.cmpf ogt, %arg6, %arg7 : f32
|
40 |
+
%30 = arith.cmpf une, %arg6, %arg6 : f32
|
41 |
+
%31 = arith.ori %29, %30 : i1
|
42 |
+
%32 = arith.select %31, %arg6, %arg7 : f32
|
43 |
+
tt.reduce.return %32 : f32
|
44 |
+
}) : (tensor<1x2048xf32>) -> tensor<1xf32>
|
45 |
+
%10 = tt.expand_dims %9 {axis = 1 : i32} : (tensor<1xf32>) -> tensor<1x1xf32>
|
46 |
+
%11 = tt.addptr %arg1, %1 : !tt.ptr<f32, 1>, i64
|
47 |
+
%12 = tt.splat %11 : (!tt.ptr<f32, 1>) -> tensor<1x1x!tt.ptr<f32, 1>>
|
48 |
+
tt.store %12, %10 {cache = 1 : i32, evict = 1 : i32} : tensor<1x1xf32>
|
49 |
+
%13 = arith.muli %1, %c50257_i64 : i64
|
50 |
+
%14 = tt.splat %13 : (i64) -> tensor<1x2048xi64>
|
51 |
+
%15 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
|
52 |
+
%16 = tt.broadcast %10 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
|
53 |
+
%17 = scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 iter_args(%arg7 = %cst_2) -> (tensor<1x2048xf32>) : i32 {
|
54 |
+
%29 = arith.extsi %arg6 : i32 to i64
|
55 |
+
%30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
|
56 |
+
%31 = arith.addi %30, %4 : tensor<1x2048xi64>
|
57 |
+
%32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
|
58 |
+
%33 = arith.addi %31, %14 : tensor<1x2048xi64>
|
59 |
+
%34 = tt.addptr %15, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
|
60 |
+
%35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 3 : i32, isVolatile = false} : tensor<1x2048xbf16>
|
61 |
+
%36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
|
62 |
+
%37 = arith.subf %36, %16 : tensor<1x2048xf32>
|
63 |
+
%38 = math.exp %37 : tensor<1x2048xf32>
|
64 |
+
%39 = arith.addf %arg7, %38 : tensor<1x2048xf32>
|
65 |
+
%40 = arith.select %32, %39, %arg7 : tensor<1x2048xi1>, tensor<1x2048xf32>
|
66 |
+
scf.yield %40 : tensor<1x2048xf32>
|
67 |
+
}
|
68 |
+
%18 = "tt.reduce"(%17) <{axis = 1 : i32}> ({
|
69 |
+
^bb0(%arg6: f32, %arg7: f32):
|
70 |
+
%29 = arith.addf %arg6, %arg7 : f32
|
71 |
+
tt.reduce.return %29 : f32
|
72 |
+
}) : (tensor<1x2048xf32>) -> tensor<1xf32>
|
73 |
+
%19 = tt.expand_dims %18 {axis = 1 : i32} : (tensor<1xf32>) -> tensor<1x1xf32>
|
74 |
+
%20 = tt.addptr %arg2, %1 : !tt.ptr<f32, 1>, i64
|
75 |
+
%21 = tt.splat %20 : (!tt.ptr<f32, 1>) -> tensor<1x1x!tt.ptr<f32, 1>>
|
76 |
+
tt.store %21, %19 {cache = 1 : i32, evict = 1 : i32} : tensor<1x1xf32>
|
77 |
+
%22 = arith.muli %1, %c50257_i64 : i64
|
78 |
+
%23 = tt.splat %22 : (i64) -> tensor<1x2048xi64>
|
79 |
+
%24 = tt.splat %arg0 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
|
80 |
+
%25 = tt.broadcast %10 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
|
81 |
+
%26 = math.log %19 : tensor<1x1xf32>
|
82 |
+
%27 = tt.broadcast %26 : (tensor<1x1xf32>) -> tensor<1x2048xf32>
|
83 |
+
%28 = tt.splat %arg3 : (!tt.ptr<bf16, 1>) -> tensor<1x2048x!tt.ptr<bf16, 1>>
|
84 |
+
scf.for %arg6 = %c0_i32 to %c50257_i32 step %c2048_i32 : i32 {
|
85 |
+
%29 = arith.extsi %arg6 : i32 to i64
|
86 |
+
%30 = tt.splat %29 : (i64) -> tensor<1x2048xi64>
|
87 |
+
%31 = arith.addi %30, %4 : tensor<1x2048xi64>
|
88 |
+
%32 = arith.cmpi slt, %31, %cst_1 : tensor<1x2048xi64>
|
89 |
+
%33 = arith.addi %31, %23 : tensor<1x2048xi64>
|
90 |
+
%34 = tt.addptr %24, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
|
91 |
+
%35 = tt.load %34, %32, %cst {cache = 1 : i32, evict = 2 : i32, isVolatile = false} : tensor<1x2048xbf16>
|
92 |
+
%36 = arith.extf %35 : tensor<1x2048xbf16> to tensor<1x2048xf32>
|
93 |
+
%37 = arith.subf %36, %25 : tensor<1x2048xf32>
|
94 |
+
%38 = arith.subf %37, %27 : tensor<1x2048xf32>
|
95 |
+
%39 = tt.addptr %28, %33 : tensor<1x2048x!tt.ptr<bf16, 1>>, tensor<1x2048xi64>
|
96 |
+
%40 = arith.truncf %38 : tensor<1x2048xf32> to tensor<1x2048xbf16>
|
97 |
+
tt.store %39, %40, %32 {cache = 1 : i32, evict = 1 : i32} : tensor<1x2048xbf16>
|
98 |
+
}
|
99 |
+
tt.return
|
100 |
+
}
|
101 |
+
}
|
.triton/dump/7dc5bb3e5c2bb99527fff34c6fba7810/triton_.ptx
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1de
|
10 |
+
|
11 |
+
.visible .entry triton__0d1de(
|
12 |
+
.param .u64 triton__0d1de_param_0,
|
13 |
+
.param .u32 triton__0d1de_param_1
|
14 |
+
)
|
15 |
+
.maxntid 128, 1, 1
|
16 |
+
{
|
17 |
+
.reg .pred %p<2>;
|
18 |
+
.reg .b32 %r<6>;
|
19 |
+
.reg .b64 %rd<5>;
|
20 |
+
.loc 1 18 0
|
21 |
+
$L__func_begin0:
|
22 |
+
.loc 1 18 0
|
23 |
+
|
24 |
+
ld.param.u64 %rd3, [triton__0d1de_param_0];
|
25 |
+
$L__tmp0:
|
26 |
+
.loc 1 21 36
|
27 |
+
mov.u32 %r2, %tid.x;
|
28 |
+
and.b32 %r3, %r2, 127;
|
29 |
+
.loc 1 20 28
|
30 |
+
mov.u32 %r1, %ctaid.x;
|
31 |
+
.loc 1 20 33
|
32 |
+
shl.b32 %r4, %r1, 7;
|
33 |
+
.loc 1 21 23
|
34 |
+
or.b32 %r5, %r4, %r3;
|
35 |
+
.loc 1 22 21
|
36 |
+
setp.lt.s32 %p1, %r5, 512;
|
37 |
+
.loc 1 25 25
|
38 |
+
cvt.s64.s32 %rd1, %r5;
|
39 |
+
mul.wide.s32 %rd4, %r5, 8;
|
40 |
+
add.s64 %rd2, %rd3, %rd4;
|
41 |
+
.loc 1 25 36
|
42 |
+
@%p1 st.global.b64 [ %rd2 + 0 ], { %rd1 };
|
43 |
+
.loc 1 25 4
|
44 |
+
ret;
|
45 |
+
$L__tmp1:
|
46 |
+
$L__func_end0:
|
47 |
+
|
48 |
+
}
|
49 |
+
.file 1 "/tmp/torchinductor_root/wx/cwxxgxdevnyc453z7hh4nxzgmvlhh6suwokktps3dw62btskgxt4.py"
|
50 |
+
.section .debug_abbrev
|
51 |
+
{
|
52 |
+
.b8 1
|
53 |
+
.b8 17
|
54 |
+
.b8 1
|
55 |
+
.b8 37
|
56 |
+
.b8 8
|
57 |
+
.b8 19
|
58 |
+
.b8 5
|
59 |
+
.b8 3
|
60 |
+
.b8 8
|
61 |
+
.b8 16
|
62 |
+
.b8 6
|
63 |
+
.b8 27
|
64 |
+
.b8 8
|
65 |
+
.b8 180
|
66 |
+
.b8 66
|
67 |
+
.b8 12
|
68 |
+
.b8 17
|
69 |
+
.b8 1
|
70 |
+
.b8 18
|
71 |
+
.b8 1
|
72 |
+
.b8 0
|
73 |
+
.b8 0
|
74 |
+
.b8 2
|
75 |
+
.b8 46
|
76 |
+
.b8 0
|
77 |
+
.b8 17
|
78 |
+
.b8 1
|
79 |
+
.b8 18
|
80 |
+
.b8 1
|
81 |
+
.b8 64
|
82 |
+
.b8 10
|
83 |
+
.b8 135
|
84 |
+
.b8 64
|
85 |
+
.b8 8
|
86 |
+
.b8 3
|
87 |
+
.b8 8
|
88 |
+
.b8 58
|
89 |
+
.b8 11
|
90 |
+
.b8 59
|
91 |
+
.b8 11
|
92 |
+
.b8 63
|
93 |
+
.b8 12
|
94 |
+
.b8 0
|
95 |
+
.b8 0
|
96 |
+
.b8 0
|
97 |
+
}
|
98 |
+
.section .debug_info
|
99 |
+
{
|
100 |
+
.b32 172
|
101 |
+
.b8 2
|
102 |
+
.b8 0
|
103 |
+
.b32 .debug_abbrev
|
104 |
+
.b8 8
|
105 |
+
.b8 1
|
106 |
+
.b8 116
|
107 |
+
.b8 114
|
108 |
+
.b8 105
|
109 |
+
.b8 116
|
110 |
+
.b8 111
|
111 |
+
.b8 110
|
112 |
+
.b8 0
|
113 |
+
.b8 2
|
114 |
+
.b8 0
|
115 |
+
.b8 99
|
116 |
+
.b8 119
|
117 |
+
.b8 120
|
118 |
+
.b8 120
|
119 |
+
.b8 103
|
120 |
+
.b8 120
|
121 |
+
.b8 100
|
122 |
+
.b8 101
|
123 |
+
.b8 118
|
124 |
+
.b8 110
|
125 |
+
.b8 121
|
126 |
+
.b8 99
|
127 |
+
.b8 52
|
128 |
+
.b8 53
|
129 |
+
.b8 51
|
130 |
+
.b8 122
|
131 |
+
.b8 55
|
132 |
+
.b8 104
|
133 |
+
.b8 104
|
134 |
+
.b8 52
|
135 |
+
.b8 110
|
136 |
+
.b8 120
|
137 |
+
.b8 122
|
138 |
+
.b8 103
|
139 |
+
.b8 109
|
140 |
+
.b8 118
|
141 |
+
.b8 108
|
142 |
+
.b8 104
|
143 |
+
.b8 104
|
144 |
+
.b8 54
|
145 |
+
.b8 115
|
146 |
+
.b8 117
|
147 |
+
.b8 119
|
148 |
+
.b8 111
|
149 |
+
.b8 107
|
150 |
+
.b8 107
|
151 |
+
.b8 116
|
152 |
+
.b8 112
|
153 |
+
.b8 115
|
154 |
+
.b8 51
|
155 |
+
.b8 100
|
156 |
+
.b8 119
|
157 |
+
.b8 54
|
158 |
+
.b8 50
|
159 |
+
.b8 98
|
160 |
+
.b8 116
|
161 |
+
.b8 115
|
162 |
+
.b8 107
|
163 |
+
.b8 103
|
164 |
+
.b8 120
|
165 |
+
.b8 116
|
166 |
+
.b8 52
|
167 |
+
.b8 46
|
168 |
+
.b8 112
|
169 |
+
.b8 121
|
170 |
+
.b8 0
|
171 |
+
.b32 .debug_line
|
172 |
+
.b8 47
|
173 |
+
.b8 116
|
174 |
+
.b8 109
|
175 |
+
.b8 112
|
176 |
+
.b8 47
|
177 |
+
.b8 116
|
178 |
+
.b8 111
|
179 |
+
.b8 114
|
180 |
+
.b8 99
|
181 |
+
.b8 104
|
182 |
+
.b8 105
|
183 |
+
.b8 110
|
184 |
+
.b8 100
|
185 |
+
.b8 117
|
186 |
+
.b8 99
|
187 |
+
.b8 116
|
188 |
+
.b8 111
|
189 |
+
.b8 114
|
190 |
+
.b8 95
|
191 |
+
.b8 114
|
192 |
+
.b8 111
|
193 |
+
.b8 111
|
194 |
+
.b8 116
|
195 |
+
.b8 47
|
196 |
+
.b8 119
|
197 |
+
.b8 120
|
198 |
+
.b8 0
|
199 |
+
.b8 1
|
200 |
+
.b64 $L__func_begin0
|
201 |
+
.b64 $L__func_end0
|
202 |
+
.b8 2
|
203 |
+
.b64 $L__func_begin0
|
204 |
+
.b64 $L__func_end0
|
205 |
+
.b8 1
|
206 |
+
.b8 156
|
207 |
+
.b8 116
|
208 |
+
.b8 114
|
209 |
+
.b8 105
|
210 |
+
.b8 116
|
211 |
+
.b8 111
|
212 |
+
.b8 110
|
213 |
+
.b8 95
|
214 |
+
.b8 95
|
215 |
+
.b8 48
|
216 |
+
.b8 100
|
217 |
+
.b8 49
|
218 |
+
.b8 100
|
219 |
+
.b8 101
|
220 |
+
.b8 0
|
221 |
+
.b8 116
|
222 |
+
.b8 114
|
223 |
+
.b8 105
|
224 |
+
.b8 116
|
225 |
+
.b8 111
|
226 |
+
.b8 110
|
227 |
+
.b8 95
|
228 |
+
.b8 95
|
229 |
+
.b8 48
|
230 |
+
.b8 100
|
231 |
+
.b8 49
|
232 |
+
.b8 100
|
233 |
+
.b8 101
|
234 |
+
.b8 0
|
235 |
+
.b8 1
|
236 |
+
.b8 18
|
237 |
+
.b8 1
|
238 |
+
.b8 0
|
239 |
+
}
|
240 |
+
.section .debug_pubnames
|
241 |
+
{
|
242 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
243 |
+
$L__pubNames_start0:
|
244 |
+
.b8 2
|
245 |
+
.b8 0
|
246 |
+
.b32 .debug_info
|
247 |
+
.b32 176
|
248 |
+
.b32 125
|
249 |
+
.b8 116
|
250 |
+
.b8 114
|
251 |
+
.b8 105
|
252 |
+
.b8 116
|
253 |
+
.b8 111
|
254 |
+
.b8 110
|
255 |
+
.b8 95
|
256 |
+
.b8 95
|
257 |
+
.b8 48
|
258 |
+
.b8 100
|
259 |
+
.b8 49
|
260 |
+
.b8 100
|
261 |
+
.b8 101
|
262 |
+
.b8 0
|
263 |
+
.b32 0
|
264 |
+
$L__pubNames_end0:
|
265 |
+
}
|
266 |
+
.section .debug_pubtypes
|
267 |
+
{
|
268 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
269 |
+
$L__pubTypes_start0:
|
270 |
+
.b8 2
|
271 |
+
.b8 0
|
272 |
+
.b32 .debug_info
|
273 |
+
.b32 176
|
274 |
+
.b32 0
|
275 |
+
$L__pubTypes_end0:
|
276 |
+
}
|
277 |
+
.section .debug_loc { }
|
.triton/dump/884b5df35d2a25fd91308249e7657806/triton_.ttir
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module {
|
2 |
+
tt.func public @triton__0d1de(%arg0: !tt.ptr<f32, 1> {tt.divisibility = 16 : i32}, %arg1: i64 {tt.divisibility = 16 : i32, tt.max_divisibility = 16 : i32}) attributes {noinline = false} {
|
3 |
+
%c1024_i64 = arith.constant 1024 : i64
|
4 |
+
%cst = arith.constant dense<0.000000e+00> : tensor<1024xf32>
|
5 |
+
%0 = tt.get_program_id x : i32
|
6 |
+
%1 = arith.extsi %0 : i32 to i64
|
7 |
+
%2 = arith.muli %1, %c1024_i64 : i64
|
8 |
+
%3 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32>
|
9 |
+
%4 = arith.extsi %3 : tensor<1024xi32> to tensor<1024xi64>
|
10 |
+
%5 = tt.splat %2 : (i64) -> tensor<1024xi64>
|
11 |
+
%6 = arith.addi %5, %4 : tensor<1024xi64>
|
12 |
+
%7 = tt.splat %arg0 : (!tt.ptr<f32, 1>) -> tensor<1024x!tt.ptr<f32, 1>>
|
13 |
+
%8 = tt.addptr %7, %6 : tensor<1024x!tt.ptr<f32, 1>>, tensor<1024xi64>
|
14 |
+
tt.store %8, %cst {cache = 1 : i32, evict = 1 : i32} : tensor<1024xf32>
|
15 |
+
tt.return
|
16 |
+
}
|
17 |
+
}
|
.triton/dump/93ab21d512b10f4271e68c2f0ae3393c/triton_.cubin
ADDED
Binary file (5.54 kB). View file
|
|
.triton/dump/9a2fb05196b13393bea452d08e9aaca8/triton_.cubin
ADDED
Binary file (4.9 kB). View file
|
|
.triton/dump/9f68cc707cb8f8bff3232abf59cbd9ec/triton_.ptx
ADDED
@@ -0,0 +1,886 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Generated by LLVM NVPTX Back-End
|
3 |
+
//
|
4 |
+
|
5 |
+
.version 8.2
|
6 |
+
.target sm_89
|
7 |
+
.address_size 64
|
8 |
+
|
9 |
+
// .globl triton__0d1d2d3d4d5de6de
|
10 |
+
.extern .func __assertfail
|
11 |
+
(
|
12 |
+
.param .b64 __assertfail_param_0,
|
13 |
+
.param .b64 __assertfail_param_1,
|
14 |
+
.param .b32 __assertfail_param_2,
|
15 |
+
.param .b64 __assertfail_param_3,
|
16 |
+
.param .b64 __assertfail_param_4
|
17 |
+
)
|
18 |
+
;
|
19 |
+
.global .align 1 .b8 assertFunc_1[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
|
20 |
+
.global .align 1 .b8 assertFile_1[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
|
21 |
+
.global .align 1 .b8 assertMessage_1[39] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 49, 51, 32, 60, 32, 53, 48, 50, 53, 55};
|
22 |
+
.global .align 1 .b8 assertFunc_0[25] = {95, 99, 97, 108, 108, 95, 119, 105, 116, 104, 95, 102, 114, 97, 109, 101, 115, 95, 114, 101, 109, 111, 118, 101, 100};
|
23 |
+
.global .align 1 .b8 assertFile_0[38] = {60, 102, 114, 111, 122, 101, 110, 32, 105, 109, 112, 111, 114, 116, 108, 105, 98, 46, 95, 98, 111, 111, 116, 115, 116, 114, 97, 112, 95, 101, 120, 116, 101, 114, 110, 97, 108, 62};
|
24 |
+
.global .align 1 .b8 assertMessage_0[38] = {105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111, 117, 110, 100, 115, 58, 32, 48, 32, 60, 61, 32, 116, 109, 112, 51, 32, 60, 32, 53, 48, 50, 53, 55};
|
25 |
+
.extern .shared .align 1 .b8 global_smem[];
|
26 |
+
.global .align 1 .b8 _$_str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
|
27 |
+
|
28 |
+
.visible .entry triton__0d1d2d3d4d5de6de(
|
29 |
+
.param .u64 triton__0d1d2d3d4d5de6de_param_0,
|
30 |
+
.param .u64 triton__0d1d2d3d4d5de6de_param_1,
|
31 |
+
.param .u64 triton__0d1d2d3d4d5de6de_param_2,
|
32 |
+
.param .u64 triton__0d1d2d3d4d5de6de_param_3,
|
33 |
+
.param .u64 triton__0d1d2d3d4d5de6de_param_4,
|
34 |
+
.param .u32 triton__0d1d2d3d4d5de6de_param_5,
|
35 |
+
.param .u32 triton__0d1d2d3d4d5de6de_param_6
|
36 |
+
)
|
37 |
+
.maxntid 128, 1, 1
|
38 |
+
{
|
39 |
+
.reg .pred %p<42>;
|
40 |
+
.reg .b16 %rs<5>;
|
41 |
+
.reg .b32 %r<140>;
|
42 |
+
.reg .f32 %f<148>;
|
43 |
+
.reg .b64 %rd<67>;
|
44 |
+
.loc 1 18 0
|
45 |
+
$L__func_begin0:
|
46 |
+
.loc 1 18 0
|
47 |
+
|
48 |
+
ld.param.u64 %rd17, [triton__0d1d2d3d4d5de6de_param_4];
|
49 |
+
ld.param.u64 %rd16, [triton__0d1d2d3d4d5de6de_param_3];
|
50 |
+
ld.param.u64 %rd28, [triton__0d1d2d3d4d5de6de_param_0];
|
51 |
+
ld.param.u64 %rd29, [triton__0d1d2d3d4d5de6de_param_1];
|
52 |
+
$L__tmp0:
|
53 |
+
.loc 1 22 44
|
54 |
+
mov.u32 %r16, %tid.x;
|
55 |
+
and.b32 %r1, %r16, 31;
|
56 |
+
ld.param.u64 %rd30, [triton__0d1d2d3d4d5de6de_param_2];
|
57 |
+
bfe.u32 %r2, %r16, 5, 2;
|
58 |
+
bfe.u32 %r3, %r16, 1, 4;
|
59 |
+
shl.b32 %r17, %r2, 4;
|
60 |
+
or.b32 %r4, %r17, %r3;
|
61 |
+
and.b32 %r18, %r16, 63;
|
62 |
+
.loc 1 24 33
|
63 |
+
shl.b32 %r19, %r16, 2;
|
64 |
+
and.b32 %r5, %r19, 4;
|
65 |
+
and.b32 %r6, %r16, 7;
|
66 |
+
.loc 1 31 36
|
67 |
+
shl.b32 %r7, %r2, 2;
|
68 |
+
.loc 1 21 28
|
69 |
+
mov.u32 %r14, %ctaid.x;
|
70 |
+
.loc 1 21 33
|
71 |
+
shl.b32 %r20, %r14, 6;
|
72 |
+
.loc 1 22 23
|
73 |
+
or.b32 %r21, %r20, %r4;
|
74 |
+
or.b32 %r22, %r20, %r18;
|
75 |
+
.loc 1 26 30
|
76 |
+
mul.wide.s32 %rd31, %r21, 8;
|
77 |
+
add.s64 %rd19, %rd28, %rd31;
|
78 |
+
mul.wide.s32 %rd32, %r22, 8;
|
79 |
+
add.s64 %rd27, %rd28, %rd32;
|
80 |
+
mov.pred %p1, -1;
|
81 |
+
.loc 1 26 35
|
82 |
+
mov.u64 %rd18, 0x0;
|
83 |
+
@%p1 ld.global.L1::evict_last.b64 { %rd18 }, [ %rd19 + 0 ];
|
84 |
+
mov.u64 %rd20, 0x0;
|
85 |
+
@%p1 ld.global.L1::evict_last.b64 { %rd20 }, [ %rd19 + 0 ];
|
86 |
+
mov.u64 %rd22, 0x0;
|
87 |
+
@%p1 ld.global.L1::evict_last.b64 { %rd22 }, [ %rd19 + 0 ];
|
88 |
+
mov.u64 %rd24, 0x0;
|
89 |
+
@%p1 ld.global.L1::evict_last.b64 { %rd24 }, [ %rd19 + 0 ];
|
90 |
+
mov.u64 %rd26, 0x0;
|
91 |
+
@%p1 ld.global.L1::evict_last.b64 { %rd26 }, [ %rd27 + 0 ];
|
92 |
+
.loc 1 27 18
|
93 |
+
bfe.s32 %r23, %r14, 25, 1;
|
94 |
+
shr.u32 %r24, %r23, 23;
|
95 |
+
add.s32 %r25, %r21, %r24;
|
96 |
+
and.b32 %r26, %r25, 16776704;
|
97 |
+
sub.s32 %r27, %r21, %r26;
|
98 |
+
.loc 1 35 44
|
99 |
+
shl.b32 %r28, %r27, 8;
|
100 |
+
.loc 1 36 22
|
101 |
+
add.s64 %rd33, %rd26, 50257;
|
102 |
+
.loc 1 37 22
|
103 |
+
setp.lt.s64 %p6, %rd18, 0;
|
104 |
+
setp.lt.s64 %p7, %rd26, 0;
|
105 |
+
.loc 1 38 36
|
106 |
+
selp.b64 %rd1, %rd33, %rd26, %p7;
|
107 |
+
.loc 1 40 44
|
108 |
+
shl.b64 %rd34, %rd18, 8;
|
109 |
+
add.s64 %rd35, %rd34, 12865792;
|
110 |
+
selp.b64 %rd36, %rd35, %rd34, %p6;
|
111 |
+
.loc 1 31 36
|
112 |
+
and.b32 %r29, %r16, 1;
|
113 |
+
mul.wide.u32 %rd2, %r29, 16;
|
114 |
+
shl.b64 %rd37, %rd36, 2;
|
115 |
+
or.b64 %rd38, %rd2, %rd37;
|
116 |
+
add.s64 %rd66, %rd29, %rd38;
|
117 |
+
or.b32 %r30, %r28, %r5;
|
118 |
+
mul.wide.s32 %rd39, %r30, 4;
|
119 |
+
add.s64 %rd64, %rd30, %rd39;
|
120 |
+
mov.f32 %f132, 0f00000000;
|
121 |
+
mov.b32 %r138, -8;
|
122 |
+
mov.u64 %rd62, %rd64;
|
123 |
+
mov.u64 %rd63, %rd66;
|
124 |
+
mov.f32 %f133, %f132;
|
125 |
+
mov.f32 %f134, %f132;
|
126 |
+
mov.f32 %f135, %f132;
|
127 |
+
mov.f32 %f136, %f132;
|
128 |
+
mov.f32 %f137, %f132;
|
129 |
+
mov.f32 %f138, %f132;
|
130 |
+
mov.f32 %f139, %f132;
|
131 |
+
mov.f32 %f140, %f132;
|
132 |
+
mov.f32 %f141, %f132;
|
133 |
+
mov.f32 %f142, %f132;
|
134 |
+
mov.f32 %f143, %f132;
|
135 |
+
mov.f32 %f144, %f132;
|
136 |
+
mov.f32 %f145, %f132;
|
137 |
+
mov.f32 %f146, %f132;
|
138 |
+
mov.f32 %f147, %f132;
|
139 |
+
bra.uni $L__BB0_1;
|
140 |
+
$L__BB0_3:
|
141 |
+
.loc 1 0 0
|
142 |
+
mov.b32 %f17, %r31;
|
143 |
+
mov.b32 %f18, %r32;
|
144 |
+
mov.b32 %f19, %r33;
|
145 |
+
mov.b32 %f20, %r34;
|
146 |
+
.loc 1 40 52
|
147 |
+
mov.u32 %r40, 0x0;
|
148 |
+
mov.u32 %r41, 0x0;
|
149 |
+
mov.u32 %r42, 0x0;
|
150 |
+
mov.u32 %r43, 0x0;
|
151 |
+
@%p1 ld.global.L1::evict_last.v4.b32 { %r40, %r41, %r42, %r43 }, [ %rd63 + 0 ];
|
152 |
+
@!%p1 mov.u32 %r40, %r124;
|
153 |
+
@!%p1 mov.u32 %r41, %r124;
|
154 |
+
@!%p1 mov.u32 %r42, %r124;
|
155 |
+
@!%p1 mov.u32 %r43, %r124;
|
156 |
+
mov.b32 %f48, %r40;
|
157 |
+
mov.b32 %f49, %r41;
|
158 |
+
mov.b32 %f50, %r42;
|
159 |
+
mov.b32 %f51, %r43;
|
160 |
+
.loc 1 41 22
|
161 |
+
add.f32 %f52, %f17, %f48;
|
162 |
+
add.f32 %f53, %f18, %f49;
|
163 |
+
add.f32 %f54, %f19, %f50;
|
164 |
+
add.f32 %f55, %f20, %f51;
|
165 |
+
$L__tmp1:
|
166 |
+
.loc 2 96 20
|
167 |
+
sub.f32 %f56, %f52, %f144;
|
168 |
+
sub.f32 %f57, %f53, %f145;
|
169 |
+
sub.f32 %f58, %f54, %f146;
|
170 |
+
sub.f32 %f59, %f55, %f147;
|
171 |
+
.loc 2 97 26
|
172 |
+
add.f32 %f132, %f132, 0f3F800000;
|
173 |
+
add.f32 %f133, %f133, 0f3F800000;
|
174 |
+
add.f32 %f134, %f134, 0f3F800000;
|
175 |
+
add.f32 %f135, %f135, 0f3F800000;
|
176 |
+
add.f32 %f136, %f136, 0f3F800000;
|
177 |
+
add.f32 %f137, %f137, 0f3F800000;
|
178 |
+
add.f32 %f138, %f138, 0f3F800000;
|
179 |
+
add.f32 %f139, %f139, 0f3F800000;
|
180 |
+
.loc 2 98 30
|
181 |
+
mov.b32 %r49, %f56;
|
182 |
+
mov.b32 %r50, %f132;
|
183 |
+
div.full.f32 %r48, %r49, %r50;
|
184 |
+
mov.b32 %f60, %r48;
|
185 |
+
mov.b32 %r52, %f57;
|
186 |
+
mov.b32 %r53, %f133;
|
187 |
+
div.full.f32 %r51, %r52, %r53;
|
188 |
+
mov.b32 %f61, %r51;
|
189 |
+
mov.b32 %r55, %f58;
|
190 |
+
mov.b32 %r56, %f134;
|
191 |
+
div.full.f32 %r54, %r55, %r56;
|
192 |
+
mov.b32 %f62, %r54;
|
193 |
+
mov.b32 %r58, %f59;
|
194 |
+
mov.b32 %r59, %f135;
|
195 |
+
div.full.f32 %r57, %r58, %r59;
|
196 |
+
mov.b32 %f63, %r57;
|
197 |
+
.loc 2 98 22
|
198 |
+
add.f32 %f144, %f144, %f60;
|
199 |
+
add.f32 %f145, %f145, %f61;
|
200 |
+
add.f32 %f146, %f146, %f62;
|
201 |
+
add.f32 %f147, %f147, %f63;
|
202 |
+
.loc 2 101 30
|
203 |
+
sub.f32 %f64, %f52, %f144;
|
204 |
+
sub.f32 %f65, %f53, %f145;
|
205 |
+
sub.f32 %f66, %f54, %f146;
|
206 |
+
sub.f32 %f67, %f55, %f147;
|
207 |
+
$L__tmp2:
|
208 |
+
.loc 1 47 48
|
209 |
+
fma.rn.f32 %f140, %f56, %f64, %f140;
|
210 |
+
fma.rn.f32 %f141, %f57, %f65, %f141;
|
211 |
+
fma.rn.f32 %f142, %f58, %f66, %f142;
|
212 |
+
fma.rn.f32 %f143, %f59, %f67, %f143;
|
213 |
+
.loc 1 31 36
|
214 |
+
add.s32 %r138, %r138, 8;
|
215 |
+
add.s64 %rd63, %rd63, 32;
|
216 |
+
add.s64 %rd62, %rd62, 32;
|
217 |
+
setp.lt.u32 %p19, %r138, 248;
|
218 |
+
@%p19 bra $L__BB0_1;
|
219 |
+
bra.uni $L__BB0_4;
|
220 |
+
$L__BB0_1:
|
221 |
+
.loc 1 39 40
|
222 |
+
setp.lt.u64 %p13, %rd1, 50257;
|
223 |
+
mov.b32 %r124, 0;
|
224 |
+
.loc 1 35 50
|
225 |
+
mov.u32 %r31, 0x0;
|
226 |
+
mov.u32 %r32, 0x0;
|
227 |
+
mov.u32 %r33, 0x0;
|
228 |
+
mov.u32 %r34, 0x0;
|
229 |
+
@%p1 ld.global.L1::evict_last.v4.b32 { %r31, %r32, %r33, %r34 }, [ %rd62 + 0 ];
|
230 |
+
@!%p1 mov.u32 %r31, %r124;
|
231 |
+
@!%p1 mov.u32 %r32, %r124;
|
232 |
+
@!%p1 mov.u32 %r33, %r124;
|
233 |
+
@!%p1 mov.u32 %r34, %r124;
|
234 |
+
mov.b32 %r137, 883;
|
235 |
+
mov.u64 %rd61, 1;
|
236 |
+
.loc 1 39 55
|
237 |
+
@%p13 bra $L__BB0_3;
|
238 |
+
mov.u64 %rd41, assertMessage_0;
|
239 |
+
cvta.global.u64 %rd42, %rd41;
|
240 |
+
mov.u64 %rd43, assertFile_0;
|
241 |
+
cvta.global.u64 %rd44, %rd43;
|
242 |
+
mov.u64 %rd45, assertFunc_0;
|
243 |
+
cvta.global.u64 %rd46, %rd45;
|
244 |
+
{ // callseq 2, 0
|
245 |
+
.reg .b32 temp_param_reg;
|
246 |
+
.param .b64 param0;
|
247 |
+
st.param.b64 [param0+0], %rd42;
|
248 |
+
.param .b64 param1;
|
249 |
+
st.param.b64 [param1+0], %rd44;
|
250 |
+
.param .b32 param2;
|
251 |
+
st.param.b32 [param2+0], %r137;
|
252 |
+
.param .b64 param3;
|
253 |
+
st.param.b64 [param3+0], %rd46;
|
254 |
+
.param .b64 param4;
|
255 |
+
st.param.b64 [param4+0], %rd61;
|
256 |
+
call.uni
|
257 |
+
__assertfail,
|
258 |
+
(
|
259 |
+
param0,
|
260 |
+
param1,
|
261 |
+
param2,
|
262 |
+
param3,
|
263 |
+
param4
|
264 |
+
);
|
265 |
+
} // callseq 2
|
266 |
+
bra.uni $L__BB0_3;
|
267 |
+
$L__BB0_4:
|
268 |
+
.loc 1 31 36
|
269 |
+
shr.u32 %r85, %r1, 3;
|
270 |
+
or.b32 %r86, %r7, %r85;
|
271 |
+
mad.lo.s32 %r87, %r86, 12, %r6;
|
272 |
+
shl.b32 %r88, %r87, 2;
|
273 |
+
mov.u32 %r89, global_smem;
|
274 |
+
add.s32 %r90, %r89, %r88;
|
275 |
+
st.shared.f32 [%r90], %f136;
|
276 |
+
st.shared.f32 [%r90+768], %f137;
|
277 |
+
st.shared.f32 [%r90+1536], %f138;
|
278 |
+
st.shared.f32 [%r90+2304], %f139;
|
279 |
+
bar.sync 0;
|
280 |
+
mad.lo.s32 %r91, %r4, 12, %r5;
|
281 |
+
shl.b32 %r92, %r91, 2;
|
282 |
+
add.s32 %r93, %r89, %r92;
|
283 |
+
ld.shared.v4.f32 {%f68, %f69, %f70, %f71}, [%r93];
|
284 |
+
$L__tmp3:
|
285 |
+
.loc 2 108 21
|
286 |
+
sub.f32 %f72, %f145, %f144;
|
287 |
+
.loc 2 109 28
|
288 |
+
add.f32 %f73, %f68, %f69;
|
289 |
+
.loc 2 110 39
|
290 |
+
setp.eq.f32 %p20, %f73, 0f00000000;
|
291 |
+
.loc 2 110 60
|
292 |
+
mov.b32 %r61, %f69;
|
293 |
+
mov.b32 %r62, %f73;
|
294 |
+
div.full.f32 %r60, %r61, %r62;
|
295 |
+
mov.b32 %f74, %r60;
|
296 |
+
.loc 2 110 49
|
297 |
+
selp.f32 %f75, 0f00000000, %f74, %p20;
|
298 |
+
.loc 2 112 17
|
299 |
+
fma.rn.f32 %f76, %f72, %f75, %f144;
|
300 |
+
.loc 2 113 15
|
301 |
+
add.f32 %f77, %f140, %f141;
|
302 |
+
.loc 2 113 30
|
303 |
+
mul.f32 %f78, %f72, %f72;
|
304 |
+
.loc 2 113 38
|
305 |
+
mul.f32 %f79, %f78, %f68;
|
306 |
+
.loc 2 113 22
|
307 |
+
fma.rn.f32 %f80, %f79, %f75, %f77;
|
308 |
+
.loc 2 108 21
|
309 |
+
sub.f32 %f81, %f146, %f76;
|
310 |
+
.loc 2 109 28
|
311 |
+
add.f32 %f82, %f70, %f73;
|
312 |
+
.loc 2 110 39
|
313 |
+
setp.eq.f32 %p21, %f82, 0f00000000;
|
314 |
+
.loc 2 110 60
|
315 |
+
mov.b32 %r65, %f82;
|
316 |
+
mov.b32 %r64, %f70;
|
317 |
+
div.full.f32 %r63, %r64, %r65;
|
318 |
+
mov.b32 %f83, %r63;
|
319 |
+
.loc 2 110 49
|
320 |
+
selp.f32 %f84, 0f00000000, %f83, %p21;
|
321 |
+
.loc 2 112 17
|
322 |
+
fma.rn.f32 %f85, %f84, %f81, %f76;
|
323 |
+
.loc 2 113 15
|
324 |
+
add.f32 %f86, %f142, %f80;
|
325 |
+
.loc 2 113 30
|
326 |
+
mul.f32 %f87, %f81, %f81;
|
327 |
+
.loc 2 113 38
|
328 |
+
mul.f32 %f88, %f73, %f87;
|
329 |
+
.loc 2 113 22
|
330 |
+
fma.rn.f32 %f89, %f84, %f88, %f86;
|
331 |
+
.loc 2 108 21
|
332 |
+
sub.f32 %f90, %f147, %f85;
|
333 |
+
.loc 2 109 28
|
334 |
+
add.f32 %f91, %f71, %f82;
|
335 |
+
.loc 2 110 39
|
336 |
+
setp.eq.f32 %p22, %f91, 0f00000000;
|
337 |
+
.loc 2 110 60
|
338 |
+
mov.b32 %r68, %f91;
|
339 |
+
mov.b32 %r67, %f71;
|
340 |
+
div.full.f32 %r66, %r67, %r68;
|
341 |
+
mov.b32 %f92, %r66;
|
342 |
+
.loc 2 110 49
|
343 |
+
selp.f32 %f93, 0f00000000, %f92, %p22;
|
344 |
+
.loc 2 112 17
|
345 |
+
fma.rn.f32 %f94, %f93, %f90, %f85;
|
346 |
+
.loc 2 113 15
|
347 |
+
add.f32 %f95, %f143, %f89;
|
348 |
+
.loc 2 113 30
|
349 |
+
mul.f32 %f96, %f90, %f90;
|
350 |
+
.loc 2 113 38
|
351 |
+
mul.f32 %f97, %f82, %f96;
|
352 |
+
.loc 2 113 22
|
353 |
+
fma.rn.f32 %f98, %f93, %f97, %f95;
|
354 |
+
$L__tmp4:
|
355 |
+
.loc 2 120 46
|
356 |
+
mov.b32 %r94, %f94;
|
357 |
+
shfl.sync.bfly.b32 %r95, %r94, 1, 31, -1;
|
358 |
+
mov.b32 %f99, %r95;
|
359 |
+
mov.b32 %r96, %f98;
|
360 |
+
shfl.sync.bfly.b32 %r97, %r96, 1, 31, -1;
|
361 |
+
mov.b32 %f100, %r97;
|
362 |
+
shfl.sync.bfly.b32 %r70, %r68, 1, 31, -1;
|
363 |
+
mov.b32 %f101, %r70;
|
364 |
+
$L__tmp5:
|
365 |
+
.loc 2 108 21
|
366 |
+
sub.f32 %f102, %f99, %f94;
|
367 |
+
.loc 2 109 28
|
368 |
+
add.f32 %f103, %f91, %f101;
|
369 |
+
.loc 2 110 39
|
370 |
+
setp.eq.f32 %p23, %f103, 0f00000000;
|
371 |
+
.loc 2 110 60
|
372 |
+
mov.b32 %r71, %f103;
|
373 |
+
div.full.f32 %r69, %r70, %r71;
|
374 |
+
mov.b32 %f104, %r69;
|
375 |
+
.loc 2 110 49
|
376 |
+
selp.f32 %f105, 0f00000000, %f104, %p23;
|
377 |
+
.loc 2 112 17
|
378 |
+
fma.rn.f32 %f37, %f105, %f102, %f94;
|
379 |
+
.loc 2 113 15
|
380 |
+
add.f32 %f106, %f98, %f100;
|
381 |
+
.loc 2 113 30
|
382 |
+
mul.f32 %f107, %f102, %f102;
|
383 |
+
.loc 2 113 38
|
384 |
+
mul.f32 %f108, %f91, %f107;
|
385 |
+
.loc 2 113 22
|
386 |
+
fma.rn.f32 %f109, %f105, %f108, %f106;
|
387 |
+
$L__tmp6:
|
388 |
+
.loc 1 69 23
|
389 |
+
mov.b32 %r73, %f109;
|
390 |
+
mov.b32 %r74, 1132462080;
|
391 |
+
div.full.f32 %r72, %r73, %r74;
|
392 |
+
mov.b32 %f110, %r72;
|
393 |
+
.loc 1 71 24
|
394 |
+
add.f32 %f38, %f110, 0f3727C5AC;
|
395 |
+
.loc 1 55 36
|
396 |
+
shl.b32 %r98, %r14, 14;
|
397 |
+
shl.b32 %r99, %r2, 12;
|
398 |
+
or.b32 %r100, %r98, %r99;
|
399 |
+
shl.b32 %r101, %r3, 8;
|
400 |
+
or.b32 %r102, %r100, %r101;
|
401 |
+
or.b32 %r11, %r102, %r5;
|
402 |
+
add.s64 %rd65, %rd16, %rd2;
|
403 |
+
mov.b32 %r139, -8;
|
404 |
+
rsqrt.approx.ftz.f32 %f123, %f38;
|
405 |
+
bra.uni $L__BB0_5;
|
406 |
+
$L__BB0_7:
|
407 |
+
.loc 1 65 54
|
408 |
+
mov.u32 %r120, 0x0;
|
409 |
+
mov.u32 %r121, 0x0;
|
410 |
+
mov.u32 %r122, 0x0;
|
411 |
+
mov.u32 %r123, 0x0;
|
412 |
+
@%p1 ld.global.L1::evict_first.v4.b32 { %r120, %r121, %r122, %r123 }, [ %rd66 + 0 ];
|
413 |
+
@!%p1 mov.u32 %r120, %r124;
|
414 |
+
@!%p1 mov.u32 %r121, %r124;
|
415 |
+
@!%p1 mov.u32 %r122, %r124;
|
416 |
+
@!%p1 mov.u32 %r123, %r124;
|
417 |
+
mov.b32 %f111, %r120;
|
418 |
+
mov.b32 %f112, %r121;
|
419 |
+
mov.b32 %f113, %r122;
|
420 |
+
mov.b32 %f114, %r123;
|
421 |
+
.loc 1 66 24
|
422 |
+
add.f32 %f115, %f39, %f111;
|
423 |
+
add.f32 %f116, %f40, %f112;
|
424 |
+
add.f32 %f117, %f41, %f113;
|
425 |
+
add.f32 %f118, %f42, %f114;
|
426 |
+
.loc 1 67 24
|
427 |
+
sub.f32 %f119, %f115, %f37;
|
428 |
+
sub.f32 %f120, %f116, %f37;
|
429 |
+
sub.f32 %f121, %f117, %f37;
|
430 |
+
sub.f32 %f122, %f118, %f37;
|
431 |
+
.loc 1 73 24
|
432 |
+
mul.f32 %f124, %f119, %f123;
|
433 |
+
mul.f32 %f125, %f120, %f123;
|
434 |
+
mul.f32 %f126, %f121, %f123;
|
435 |
+
mul.f32 %f127, %f122, %f123;
|
436 |
+
.loc 1 74 24
|
437 |
+
mul.f32 %f128, %f124, %f43;
|
438 |
+
mul.f32 %f129, %f125, %f44;
|
439 |
+
mul.f32 %f130, %f126, %f45;
|
440 |
+
mul.f32 %f131, %f127, %f46;
|
441 |
+
.loc 1 55 36
|
442 |
+
add.s32 %r139, %r139, 8;
|
443 |
+
.loc 1 76 29
|
444 |
+
add.s32 %r134, %r139, %r11;
|
445 |
+
mul.wide.s32 %rd60, %r134, 2;
|
446 |
+
add.s64 %rd59, %rd17, %rd60;
|
447 |
+
.loc 1 76 52
|
448 |
+
mov.b32 %r128, %f128;
|
449 |
+
cvt.rn.bf16.f32 %rs1, %r128;
|
450 |
+
mov.b32 %r129, %f129;
|
451 |
+
cvt.rn.bf16.f32 %rs2, %r129;
|
452 |
+
mov.b32 %r130, %f130;
|
453 |
+
cvt.rn.bf16.f32 %rs3, %r130;
|
454 |
+
mov.b32 %r131, %f131;
|
455 |
+
cvt.rn.bf16.f32 %rs4, %r131;
|
456 |
+
mov.b32 %r135, {%rs1, %rs2};
|
457 |
+
mov.b32 %r136, {%rs3, %rs4};
|
458 |
+
@%p1 st.global.v2.b32 [ %rd59 + 0 ], { %r135, %r136 };
|
459 |
+
.loc 1 55 36
|
460 |
+
add.s64 %rd66, %rd66, 32;
|
461 |
+
add.s64 %rd65, %rd65, 32;
|
462 |
+
add.s64 %rd64, %rd64, 32;
|
463 |
+
setp.lt.u32 %p41, %r139, 248;
|
464 |
+
@%p41 bra $L__BB0_5;
|
465 |
+
bra.uni $L__BB0_8;
|
466 |
+
$L__BB0_5:
|
467 |
+
.loc 1 59 51
|
468 |
+
mov.u32 %r103, 0x0;
|
469 |
+
mov.u32 %r104, 0x0;
|
470 |
+
mov.u32 %r105, 0x0;
|
471 |
+
mov.u32 %r106, 0x0;
|
472 |
+
@%p1 ld.global.L1::evict_last.v4.b32 { %r103, %r104, %r105, %r106 }, [ %rd64 + 0 ];
|
473 |
+
@!%p1 mov.u32 %r103, %r124;
|
474 |
+
@!%p1 mov.u32 %r104, %r124;
|
475 |
+
@!%p1 mov.u32 %r105, %r124;
|
476 |
+
@!%p1 mov.u32 %r106, %r124;
|
477 |
+
mov.b32 %f39, %r103;
|
478 |
+
mov.b32 %f40, %r104;
|
479 |
+
mov.b32 %f41, %r105;
|
480 |
+
mov.b32 %f42, %r106;
|
481 |
+
.loc 1 60 40
|
482 |
+
mov.u32 %r111, 0x0;
|
483 |
+
mov.u32 %r112, 0x0;
|
484 |
+
mov.u32 %r113, 0x0;
|
485 |
+
mov.u32 %r114, 0x0;
|
486 |
+
@%p1 ld.global.L1::evict_last.v4.b32 { %r111, %r112, %r113, %r114 }, [ %rd65 + 0 ];
|
487 |
+
@!%p1 mov.u32 %r111, %r124;
|
488 |
+
@!%p1 mov.u32 %r112, %r124;
|
489 |
+
@!%p1 mov.u32 %r113, %r124;
|
490 |
+
@!%p1 mov.u32 %r114, %r124;
|
491 |
+
mov.b32 %f43, %r111;
|
492 |
+
mov.b32 %f44, %r112;
|
493 |
+
mov.b32 %f45, %r113;
|
494 |
+
mov.b32 %f46, %r114;
|
495 |
+
.loc 1 64 57
|
496 |
+
@%p13 bra $L__BB0_7;
|
497 |
+
mov.u64 %rd51, assertMessage_1;
|
498 |
+
cvta.global.u64 %rd52, %rd51;
|
499 |
+
mov.u64 %rd53, assertFile_1;
|
500 |
+
cvta.global.u64 %rd54, %rd53;
|
501 |
+
mov.u64 %rd55, assertFunc_1;
|
502 |
+
cvta.global.u64 %rd56, %rd55;
|
503 |
+
{ // callseq 3, 0
|
504 |
+
.reg .b32 temp_param_reg;
|
505 |
+
.param .b64 param0;
|
506 |
+
st.param.b64 [param0+0], %rd52;
|
507 |
+
.param .b64 param1;
|
508 |
+
st.param.b64 [param1+0], %rd54;
|
509 |
+
.param .b32 param2;
|
510 |
+
st.param.b32 [param2+0], %r137;
|
511 |
+
.param .b64 param3;
|
512 |
+
st.param.b64 [param3+0], %rd56;
|
513 |
+
.param .b64 param4;
|
514 |
+
st.param.b64 [param4+0], %rd61;
|
515 |
+
call.uni
|
516 |
+
__assertfail,
|
517 |
+
(
|
518 |
+
param0,
|
519 |
+
param1,
|
520 |
+
param2,
|
521 |
+
param3,
|
522 |
+
param4
|
523 |
+
);
|
524 |
+
} // callseq 3
|
525 |
+
bra.uni $L__BB0_7;
|
526 |
+
$L__BB0_8:
|
527 |
+
.loc 1 55 4
|
528 |
+
ret;
|
529 |
+
$L__tmp7:
|
530 |
+
$L__func_end0:
|
531 |
+
|
532 |
+
}
|
533 |
+
// .globl __nv_rsqrtf
|
534 |
+
.visible .func (.param .b32 func_retval0) __nv_rsqrtf(
|
535 |
+
.param .b32 __nv_rsqrtf_param_0
|
536 |
+
)
|
537 |
+
{
|
538 |
+
.reg .f32 %f<3>;
|
539 |
+
$L__func_begin1:
|
540 |
+
|
541 |
+
ld.param.f32 %f1, [__nv_rsqrtf_param_0];
|
542 |
+
rsqrt.approx.ftz.f32 %f2, %f1;
|
543 |
+
st.param.f32 [func_retval0+0], %f2;
|
544 |
+
ret;
|
545 |
+
$L__func_end1:
|
546 |
+
|
547 |
+
}
|
548 |
+
.file 1 "/tmp/torchinductor_root/lh/clhe4a3stvufxafmq3kk5hodazz2efctffte646znjdnv3lqi5oa.py"
|
549 |
+
.file 2 "/usr/local/lib/python3.10/dist-packages/torch/_inductor/triton_helpers.py"
|
550 |
+
.section .debug_abbrev
|
551 |
+
{
|
552 |
+
.b8 1
|
553 |
+
.b8 17
|
554 |
+
.b8 1
|
555 |
+
.b8 37
|
556 |
+
.b8 8
|
557 |
+
.b8 19
|
558 |
+
.b8 5
|
559 |
+
.b8 3
|
560 |
+
.b8 8
|
561 |
+
.b8 16
|
562 |
+
.b8 6
|
563 |
+
.b8 27
|
564 |
+
.b8 8
|
565 |
+
.b8 180
|
566 |
+
.b8 66
|
567 |
+
.b8 12
|
568 |
+
.b8 17
|
569 |
+
.b8 1
|
570 |
+
.b8 18
|
571 |
+
.b8 1
|
572 |
+
.b8 0
|
573 |
+
.b8 0
|
574 |
+
.b8 2
|
575 |
+
.b8 46
|
576 |
+
.b8 0
|
577 |
+
.b8 135
|
578 |
+
.b8 64
|
579 |
+
.b8 8
|
580 |
+
.b8 3
|
581 |
+
.b8 8
|
582 |
+
.b8 58
|
583 |
+
.b8 11
|
584 |
+
.b8 59
|
585 |
+
.b8 11
|
586 |
+
.b8 63
|
587 |
+
.b8 12
|
588 |
+
.b8 32
|
589 |
+
.b8 11
|
590 |
+
.b8 0
|
591 |
+
.b8 0
|
592 |
+
.b8 3
|
593 |
+
.b8 46
|
594 |
+
.b8 1
|
595 |
+
.b8 17
|
596 |
+
.b8 1
|
597 |
+
.b8 18
|
598 |
+
.b8 1
|
599 |
+
.b8 64
|
600 |
+
.b8 10
|
601 |
+
.b8 49
|
602 |
+
.b8 19
|
603 |
+
.b8 0
|
604 |
+
.b8 0
|
605 |
+
.b8 4
|
606 |
+
.b8 29
|
607 |
+
.b8 0
|
608 |
+
.b8 49
|
609 |
+
.b8 19
|
610 |
+
.b8 17
|
611 |
+
.b8 1
|
612 |
+
.b8 18
|
613 |
+
.b8 1
|
614 |
+
.b8 88
|
615 |
+
.b8 11
|
616 |
+
.b8 89
|
617 |
+
.b8 11
|
618 |
+
.b8 87
|
619 |
+
.b8 11
|
620 |
+
.b8 0
|
621 |
+
.b8 0
|
622 |
+
.b8 5
|
623 |
+
.b8 29
|
624 |
+
.b8 1
|
625 |
+
.b8 49
|
626 |
+
.b8 19
|
627 |
+
.b8 17
|
628 |
+
.b8 1
|
629 |
+
.b8 18
|
630 |
+
.b8 1
|
631 |
+
.b8 88
|
632 |
+
.b8 11
|
633 |
+
.b8 89
|
634 |
+
.b8 11
|
635 |
+
.b8 87
|
636 |
+
.b8 11
|
637 |
+
.b8 0
|
638 |
+
.b8 0
|
639 |
+
.b8 0
|
640 |
+
}
|
641 |
+
.section .debug_info
|
642 |
+
{
|
643 |
+
.b32 298
|
644 |
+
.b8 2
|
645 |
+
.b8 0
|
646 |
+
.b32 .debug_abbrev
|
647 |
+
.b8 8
|
648 |
+
.b8 1
|
649 |
+
.b8 116
|
650 |
+
.b8 114
|
651 |
+
.b8 105
|
652 |
+
.b8 116
|
653 |
+
.b8 111
|
654 |
+
.b8 110
|
655 |
+
.b8 0
|
656 |
+
.b8 2
|
657 |
+
.b8 0
|
658 |
+
.b8 99
|
659 |
+
.b8 108
|
660 |
+
.b8 104
|
661 |
+
.b8 101
|
662 |
+
.b8 52
|
663 |
+
.b8 97
|
664 |
+
.b8 51
|
665 |
+
.b8 115
|
666 |
+
.b8 116
|
667 |
+
.b8 118
|
668 |
+
.b8 117
|
669 |
+
.b8 102
|
670 |
+
.b8 120
|
671 |
+
.b8 97
|
672 |
+
.b8 102
|
673 |
+
.b8 109
|
674 |
+
.b8 113
|
675 |
+
.b8 51
|
676 |
+
.b8 107
|
677 |
+
.b8 107
|
678 |
+
.b8 53
|
679 |
+
.b8 104
|
680 |
+
.b8 111
|
681 |
+
.b8 100
|
682 |
+
.b8 97
|
683 |
+
.b8 122
|
684 |
+
.b8 122
|
685 |
+
.b8 50
|
686 |
+
.b8 101
|
687 |
+
.b8 102
|
688 |
+
.b8 99
|
689 |
+
.b8 116
|
690 |
+
.b8 102
|
691 |
+
.b8 102
|
692 |
+
.b8 116
|
693 |
+
.b8 101
|
694 |
+
.b8 54
|
695 |
+
.b8 52
|
696 |
+
.b8 54
|
697 |
+
.b8 122
|
698 |
+
.b8 110
|
699 |
+
.b8 106
|
700 |
+
.b8 100
|
701 |
+
.b8 110
|
702 |
+
.b8 118
|
703 |
+
.b8 51
|
704 |
+
.b8 108
|
705 |
+
.b8 113
|
706 |
+
.b8 105
|
707 |
+
.b8 53
|
708 |
+
.b8 111
|
709 |
+
.b8 97
|
710 |
+
.b8 46
|
711 |
+
.b8 112
|
712 |
+
.b8 121
|
713 |
+
.b8 0
|
714 |
+
.b32 .debug_line
|
715 |
+
.b8 47
|
716 |
+
.b8 116
|
717 |
+
.b8 109
|
718 |
+
.b8 112
|
719 |
+
.b8 47
|
720 |
+
.b8 116
|
721 |
+
.b8 111
|
722 |
+
.b8 114
|
723 |
+
.b8 99
|
724 |
+
.b8 104
|
725 |
+
.b8 105
|
726 |
+
.b8 110
|
727 |
+
.b8 100
|
728 |
+
.b8 117
|
729 |
+
.b8 99
|
730 |
+
.b8 116
|
731 |
+
.b8 111
|
732 |
+
.b8 114
|
733 |
+
.b8 95
|
734 |
+
.b8 114
|
735 |
+
.b8 111
|
736 |
+
.b8 111
|
737 |
+
.b8 116
|
738 |
+
.b8 47
|
739 |
+
.b8 108
|
740 |
+
.b8 104
|
741 |
+
.b8 0
|
742 |
+
.b8 1
|
743 |
+
.b64 $L__func_begin0
|
744 |
+
.b64 $L__func_end0
|
745 |
+
.b8 2
|
746 |
+
.b8 116
|
747 |
+
.b8 114
|
748 |
+
.b8 105
|
749 |
+
.b8 116
|
750 |
+
.b8 111
|
751 |
+
.b8 110
|
752 |
+
.b8 95
|
753 |
+
.b8 95
|
754 |
+
.b8 48
|
755 |
+
.b8 100
|
756 |
+
.b8 49
|
757 |
+
.b8 100
|
758 |
+
.b8 50
|
759 |
+
.b8 100
|
760 |
+
.b8 51
|
761 |
+
.b8 100
|
762 |
+
.b8 52
|
763 |
+
.b8 100
|
764 |
+
.b8 53
|
765 |
+
.b8 100
|
766 |
+
.b8 101
|
767 |
+
.b8 54
|
768 |
+
.b8 100
|
769 |
+
.b8 101
|
770 |
+
.b8 0
|
771 |
+
.b8 116
|
772 |
+
.b8 114
|
773 |
+
.b8 105
|
774 |
+
.b8 116
|
775 |
+
.b8 111
|
776 |
+
.b8 110
|
777 |
+
.b8 95
|
778 |
+
.b8 95
|
779 |
+
.b8 48
|
780 |
+
.b8 100
|
781 |
+
.b8 49
|
782 |
+
.b8 100
|
783 |
+
.b8 50
|
784 |
+
.b8 100
|
785 |
+
.b8 51
|
786 |
+
.b8 100
|
787 |
+
.b8 52
|
788 |
+
.b8 100
|
789 |
+
.b8 53
|
790 |
+
.b8 100
|
791 |
+
.b8 101
|
792 |
+
.b8 54
|
793 |
+
.b8 100
|
794 |
+
.b8 101
|
795 |
+
.b8 0
|
796 |
+
.b8 1
|
797 |
+
.b8 18
|
798 |
+
.b8 1
|
799 |
+
.b8 1
|
800 |
+
.b8 3
|
801 |
+
.b64 $L__func_begin0
|
802 |
+
.b64 $L__func_end0
|
803 |
+
.b8 1
|
804 |
+
.b8 156
|
805 |
+
.b32 125
|
806 |
+
.b8 4
|
807 |
+
.b32 125
|
808 |
+
.b64 $L__tmp1
|
809 |
+
.b64 $L__tmp2
|
810 |
+
.b8 2
|
811 |
+
.b8 44
|
812 |
+
.b8 38
|
813 |
+
.b8 5
|
814 |
+
.b32 125
|
815 |
+
.b64 $L__tmp3
|
816 |
+
.b64 $L__tmp6
|
817 |
+
.b8 2
|
818 |
+
.b8 50
|
819 |
+
.b8 41
|
820 |
+
.b8 4
|
821 |
+
.b32 125
|
822 |
+
.b64 $L__tmp3
|
823 |
+
.b64 $L__tmp6
|
824 |
+
.b8 2
|
825 |
+
.b8 120
|
826 |
+
.b8 46
|
827 |
+
.b8 0
|
828 |
+
.b8 4
|
829 |
+
.b32 125
|
830 |
+
.b64 $L__tmp4
|
831 |
+
.b64 $L__tmp5
|
832 |
+
.b8 2
|
833 |
+
.b8 50
|
834 |
+
.b8 41
|
835 |
+
.b8 0
|
836 |
+
.b8 0
|
837 |
+
}
|
838 |
+
.section .debug_pubnames
|
839 |
+
{
|
840 |
+
.b32 $L__pubNames_end0-$L__pubNames_start0
|
841 |
+
$L__pubNames_start0:
|
842 |
+
.b8 2
|
843 |
+
.b8 0
|
844 |
+
.b32 .debug_info
|
845 |
+
.b32 302
|
846 |
+
.b32 125
|
847 |
+
.b8 116
|
848 |
+
.b8 114
|
849 |
+
.b8 105
|
850 |
+
.b8 116
|
851 |
+
.b8 111
|
852 |
+
.b8 110
|
853 |
+
.b8 95
|
854 |
+
.b8 95
|
855 |
+
.b8 48
|
856 |
+
.b8 100
|
857 |
+
.b8 49
|
858 |
+
.b8 100
|
859 |
+
.b8 50
|
860 |
+
.b8 100
|
861 |
+
.b8 51
|
862 |
+
.b8 100
|
863 |
+
.b8 52
|
864 |
+
.b8 100
|
865 |
+
.b8 53
|
866 |
+
.b8 100
|
867 |
+
.b8 101
|
868 |
+
.b8 54
|
869 |
+
.b8 100
|
870 |
+
.b8 101
|
871 |
+
.b8 0
|
872 |
+
.b32 0
|
873 |
+
$L__pubNames_end0:
|
874 |
+
}
|
875 |
+
.section .debug_pubtypes
|
876 |
+
{
|
877 |
+
.b32 $L__pubTypes_end0-$L__pubTypes_start0
|
878 |
+
$L__pubTypes_start0:
|
879 |
+
.b8 2
|
880 |
+
.b8 0
|
881 |
+
.b32 .debug_info
|
882 |
+
.b32 302
|
883 |
+
.b32 0
|
884 |
+
$L__pubTypes_end0:
|
885 |
+
}
|
886 |
+
.section .debug_loc { }
|